2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright 2005-2009 Novell, Inc (http://www.novell.com)
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list and critical_fin_list are part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
186 mono_gc_flush_info (void)
188 fflush (gc_debug_file);
192 #define MAX_DEBUG_LEVEL 8
193 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
195 #define TV_DECLARE(name) struct timeval name
196 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
197 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
199 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
209 /* each request from the OS ends up in a GCMemSection */
210 typedef struct _GCMemSection GCMemSection;
211 struct _GCMemSection {
215 /* pointer where more data could be allocated if it fits */
219 * scan starts is an array of pointers to objects equally spaced in the allocation area
220 * They let use quickly find pinned objects from pinning pointers.
223 /* in major collections indexes in the pin_queue for objects that pin this section */
226 unsigned short num_scan_start;
230 /* large object space struct: 64+ KB */
231 /* we could make this limit much smaller to avoid memcpy copy
232 * and potentially have more room in the GC descriptor: need to measure
233 * This also means that such small OS objects will need to be
234 * allocated in a different way (using pinned chunks).
235 * We may want to put large but smaller than 64k objects in the fixed space
236 * when we move the object from one generation to another (to limit the
237 * pig in the snake effect).
238 * Note: it may be worth to have an optimized copy function, since we can
239 * assume that objects are aligned and have a multiple of 8 size.
240 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
241 * true if MONO_ZERO_LEN_ARRAY is nonzero.
243 typedef struct _LOSObject LOSObject;
246 mword size; /* this is the object size */
247 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
250 char data [MONO_ZERO_LEN_ARRAY];
253 /* Pinned objects are allocated in the LOS space if bigger than half a page
254 * or from freelists otherwise. We assume that pinned objects are relatively few
255 * and they have a slow dying speed (like interned strings, thread objects).
256 * As such they will be collected only at major collections.
257 * free lists are not global: when we need memory we allocate a PinnedChunk.
258 * Each pinned chunk is made of several pages, the first of wich is used
259 * internally for bookeeping (here think of a page as 4KB). The bookeeping
260 * includes the freelists vectors and info about the object size of each page
261 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
262 * a size is assigned to it, the page is divided in the proper chunks and each
263 * chunk is added to the freelist. To not waste space, the remaining space in the
264 * first page is used as objects of size 16 or 32 (need to measure which are more
266 * We use this same structure to allocate memory used internally by the GC, so
267 * we never use malloc/free if we need to alloc during collection: the world is stopped
268 * and malloc/free will deadlock.
269 * When we want to iterate over pinned objects, we just scan a page at a time
270 * linearly according to the size of objects in the page: the next pointer used to link
271 * the items in the freelist uses the same word as the vtable. Since we keep freelists
272 * for each pinned chunk, if the word points outside the pinned chunk it means
274 * We could avoid this expensive scanning in creative ways. We could have a policy
275 * of putting in the pinned space only objects we know about that have no struct fields
276 * with references and we can easily use a even expensive write barrier for them,
277 * since pointer writes on such objects should be rare.
278 * The best compromise is to just alloc interned strings and System.MonoType in them.
279 * It would be nice to allocate MonoThread in it, too: must check that we properly
280 * use write barriers so we don't have to do any expensive scanning of the whole pinned
281 * chunk list during minor collections. We can avoid it now because we alloc in it only
282 * reference-free objects.
284 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
285 #define MAX_FREELIST_SIZE 2048
286 #define PINNED_PAGE_SIZE (4096)
287 #define PINNED_CHUNK_MIN_SIZE (4096*8)
288 typedef struct _PinnedChunk PinnedChunk;
289 struct _PinnedChunk {
292 int *page_sizes; /* a 0 means the page is still unused */
295 void *data [1]; /* page sizes and free lists are stored here */
298 /* The method used to clear the nursery */
299 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
300 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
305 CLEAR_AT_TLAB_CREATION
306 } NurseryClearPolicy;
308 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
311 * If this is set, the nursery is aligned to an address aligned to its size, ie.
312 * a 1MB nursery will be aligned to an address divisible by 1MB. This allows us to
313 * speed up ptr_in_nursery () checks which are very frequent. This requires the
314 * nursery size to be a compile time constant.
316 #define ALIGN_NURSERY 1
319 * The young generation is divided into fragments. This is because
320 * we can hand one fragments to a thread for lock-less fast alloc and
321 * because the young generation ends up fragmented anyway by pinned objects.
322 * Once a collection is done, a list of fragments is created. When doing
323 * thread local alloc we use smallish nurseries so we allow new threads to
324 * allocate memory from gen0 without triggering a collection. Threads that
325 * are found to allocate lots of memory are given bigger fragments. This
326 * should make the finalizer thread use little nursery memory after a while.
327 * We should start assigning threads very small fragments: if there are many
328 * threads the nursery will be full of reserved space that the threads may not
329 * use at all, slowing down allocation speed.
330 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
331 * Allocation Buffers (TLABs).
333 typedef struct _Fragment Fragment;
337 char *fragment_start;
338 char *fragment_limit; /* the current soft limit for allocation */
342 /* the runtime can register areas of memory as roots: we keep two lists of roots,
343 * a pinned root set for conservatively scanned roots and a normal one for
344 * precisely scanned roots (currently implemented as a single list).
346 typedef struct _RootRecord RootRecord;
354 /* for use with write barriers */
355 typedef struct _RememberedSet RememberedSet;
356 struct _RememberedSet {
360 mword data [MONO_ZERO_LEN_ARRAY];
363 /* we have 4 possible values in the low 2 bits */
365 REMSET_LOCATION, /* just a pointer to the exact location */
366 REMSET_RANGE, /* range of pointer fields */
367 REMSET_OBJECT, /* mark all the object for scanning */
368 REMSET_OTHER, /* all others */
369 REMSET_TYPE_MASK = 0x3
372 /* Subtypes of REMSET_OTHER */
374 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
375 REMSET_ROOT_LOCATION, /* a location inside a root */
378 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
379 static pthread_key_t remembered_set_key;
380 static RememberedSet *global_remset;
381 //static int store_to_global_remset = 0;
383 /* FIXME: later choose a size that takes into account the RememberedSet struct
384 * and doesn't waste any alloc paddin space.
386 #define DEFAULT_REMSET_SIZE 1024
387 static RememberedSet* alloc_remset (int size, gpointer id);
389 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
390 * no cast from a pointer to an integer
397 /* these bits are set in the object vtable: we could merge them since an object can be
398 * either pinned or forwarded but not both.
399 * We store them in the vtable slot because the bits are used in the sync block for
400 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
401 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
402 * would be an invalid combination for the monitor and hash code).
403 * The values are already shifted.
404 * The forwarding address is stored in the sync block.
406 #define FORWARDED_BIT 1
408 #define VTABLE_BITS_MASK 0x3
410 /* returns NULL if not forwarded, or the forwarded address */
411 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
412 /* set the forwarded address fw_addr for object obj */
413 #define forward_object(obj,fw_addr) do { \
414 ((mword*)(obj))[0] |= FORWARDED_BIT; \
415 ((mword*)(obj))[1] = (mword)(fw_addr); \
418 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
419 #define pin_object(obj) do { \
420 ((mword*)(obj))[0] |= PINNED_BIT; \
422 #define unpin_object(obj) do { \
423 ((mword*)(obj))[0] &= ~PINNED_BIT; \
427 #define ptr_in_nursery(ptr) (((mword)(ptr) & ~((1 << DEFAULT_NURSERY_BITS) - 1)) == (mword)nursery_start)
429 #define ptr_in_nursery(ptr) ((char*)(ptr) >= nursery_start && (char*)(ptr) < nursery_real_end)
433 * Since we set bits in the vtable, use the macro to load it from the pointer to
434 * an object that is potentially pinned.
436 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
439 safe_name (void* obj)
441 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
442 return vt->klass->name;
446 safe_object_get_size (MonoObject* o)
448 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
449 if (klass == mono_defaults.string_class) {
450 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
451 } else if (klass->rank) {
452 MonoArray *array = (MonoArray*)o;
453 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
454 if (G_UNLIKELY (array->bounds)) {
457 size += sizeof (MonoArrayBounds) * klass->rank;
461 /* from a created object: the class must be inited already */
462 return klass->instance_size;
466 static inline gboolean
467 is_maybe_half_constructed (MonoObject *o)
471 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
472 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
473 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
480 * ######################################################################
481 * ######## Global data.
482 * ######################################################################
484 static LOCK_DECLARE (gc_mutex);
485 static int gc_disabled = 0;
486 static int num_minor_gcs = 0;
487 static int num_major_gcs = 0;
489 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
490 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
491 #define DEFAULT_NURSERY_SIZE (1024*512*2)
492 /* The number of trailing 0 bits in DEFAULT_NURSERY_SIZE */
493 #define DEFAULT_NURSERY_BITS 20
494 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
495 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
496 /* to quickly find the head of an object pinned by a conservative address
497 * we keep track of the objects allocated for each SCAN_START_SIZE memory
498 * chunk in the nursery or other memory sections. Larger values have less
499 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
501 #define SCAN_START_SIZE (4096*2)
502 /* the minimum size of a fragment that we consider useful for allocation */
503 #define FRAGMENT_MIN_SIZE (512)
504 /* This is a fixed value used for pinned chunks, not the system pagesize */
505 #define FREELIST_PAGESIZE 4096
507 static mword pagesize = 4096;
508 static mword nursery_size = DEFAULT_NURSERY_SIZE;
509 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
510 static mword max_section_size = DEFAULT_MAX_SECTION;
511 static int section_size_used = 0;
512 static int degraded_mode = 0;
514 static LOSObject *los_object_list = NULL;
515 static mword los_memory_usage = 0;
516 static mword los_num_objects = 0;
517 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
518 static mword total_alloc = 0;
519 /* use this to tune when to do a major/minor collection */
520 static mword memory_pressure = 0;
522 static GCMemSection *section_list = NULL;
523 static GCMemSection *nursery_section = NULL;
524 static mword lowest_heap_address = ~(mword)0;
525 static mword highest_heap_address = 0;
527 typedef struct _FinalizeEntry FinalizeEntry;
528 struct _FinalizeEntry {
533 typedef struct _DisappearingLink DisappearingLink;
534 struct _DisappearingLink {
535 DisappearingLink *next;
540 * The link pointer is hidden by negating each bit. We use the lowest
541 * bit of the link (before negation) to store whether it needs
542 * resurrection tracking.
544 #define HIDE_POINTER(p,t) ((gpointer)(~((gulong)(p)|((t)?1:0))))
545 #define REVEAL_POINTER(p) ((gpointer)((~(gulong)(p))&~3L))
547 #define DISLINK_OBJECT(d) (REVEAL_POINTER (*(d)->link))
548 #define DISLINK_TRACK(d) ((~(gulong)(*(d)->link)) & 1)
551 * The finalizable hash has the object as the key, the
552 * disappearing_link hash, has the link address as key.
554 static FinalizeEntry **finalizable_hash = NULL;
555 /* objects that are ready to be finalized */
556 static FinalizeEntry *fin_ready_list = NULL;
557 static FinalizeEntry *critical_fin_list = NULL;
558 static DisappearingLink **disappearing_link_hash = NULL;
559 static mword disappearing_link_hash_size = 0;
560 static mword finalizable_hash_size = 0;
562 static int num_registered_finalizers = 0;
563 static int num_ready_finalizers = 0;
564 static int num_disappearing_links = 0;
565 static int no_finalize = 0;
567 /* keep each size a multiple of ALLOC_ALIGN */
568 /* on 64 bit systems 8 is likely completely unused. */
569 static const int freelist_sizes [] = {
570 8, 16, 24, 32, 40, 48, 64, 80,
571 96, 128, 160, 192, 224, 256, 320, 384,
572 448, 512, 584, 680, 816, 1024, 1360, 2048};
573 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
575 static char* max_pinned_chunk_addr = NULL;
576 static char* min_pinned_chunk_addr = (char*)-1;
577 /* pinned_chunk_list is used for allocations of objects that are never moved */
578 static PinnedChunk *pinned_chunk_list = NULL;
579 /* internal_chunk_list is used for allocating structures needed by the GC */
580 static PinnedChunk *internal_chunk_list = NULL;
583 obj_is_from_pinned_alloc (char *p)
585 PinnedChunk *chunk = pinned_chunk_list;
586 for (; chunk; chunk = chunk->next) {
587 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
593 static int slot_for_size (size_t size);
596 free_pinned_object (PinnedChunk *chunk, char *obj, size_t size)
598 void **p = (void**)obj;
599 int slot = slot_for_size (size);
601 g_assert (obj >= (char*)chunk->start_data && obj < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE));
602 *p = chunk->free_list [slot];
603 chunk->free_list [slot] = p;
607 ROOT_TYPE_NORMAL = 0, /* "normal" roots */
608 ROOT_TYPE_PINNED = 1, /* roots without a GC descriptor */
609 ROOT_TYPE_WBARRIER = 2, /* roots with a write barrier */
613 /* registered roots: the key to the hash is the root start address */
615 * Different kinds of roots are kept separate to speed up pin_from_roots () for example.
617 static RootRecord **roots_hash [ROOT_TYPE_NUM] = { NULL, NULL };
618 static int roots_hash_size [ROOT_TYPE_NUM] = { 0, 0, 0 };
619 static mword roots_size = 0; /* amount of memory in the root set */
620 static int num_roots_entries [ROOT_TYPE_NUM] = { 0, 0, 0 };
623 * The current allocation cursors
624 * We allocate objects in the nursery.
625 * The nursery is the area between nursery_start and nursery_real_end.
626 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
627 * from nursery fragments.
628 * tlab_next is the pointer to the space inside the TLAB where the next object will
630 * tlab_temp_end is the pointer to the end of the temporary space reserved for
631 * the allocation: it allows us to set the scan starts at reasonable intervals.
632 * tlab_real_end points to the end of the TLAB.
633 * nursery_frag_real_end points to the end of the currently used nursery fragment.
634 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
635 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
636 * At the next allocation, the area of the nursery where objects can be present is
637 * between MIN(nursery_first_pinned_start, first_fragment_start) and
638 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
640 static char *nursery_start = NULL;
643 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
644 * variables for next+temp_end ?
646 static __thread char *tlab_start;
647 static __thread char *tlab_next;
648 static __thread char *tlab_temp_end;
649 static __thread char *tlab_real_end;
650 /* Used by the managed allocator */
651 static __thread char **tlab_next_addr;
652 static char *nursery_next = NULL;
653 static char *nursery_frag_real_end = NULL;
654 static char *nursery_real_end = NULL;
655 //static char *nursery_first_pinned_start = NULL;
656 static char *nursery_last_pinned_end = NULL;
658 /* The size of a TLAB */
659 /* The bigger the value, the less often we have to go to the slow path to allocate a new
660 * one, but the more space is wasted by threads not allocating much memory.
662 * FIXME: Make this self-tuning for each thread.
664 static guint32 tlab_size = (1024 * 4);
666 /* fragments that are free and ready to be used for allocation */
667 static Fragment *nursery_fragments = NULL;
668 /* freeelist of fragment structures */
669 static Fragment *fragment_freelist = NULL;
672 * used when moving the objects
673 * When the nursery is collected, objects are copied to to_space.
674 * The area between gray_first and gray_objects is used as a stack
675 * of objects that need their fields checked for more references
677 * We should optimize somehow this mechanism to avoid rescanning
678 * ptr-free objects. The order is also probably not optimal: need to
679 * test cache misses and other graph traversal orders.
681 static char *to_space = NULL;
682 static char *gray_first = NULL;
683 static char *gray_objects = NULL;
684 static char *to_space_end = NULL;
685 static GCMemSection *to_space_section = NULL;
687 /* objects bigger then this go into the large object space */
688 #define MAX_SMALL_OBJ_SIZE 0xffff
690 /* Functions supplied by the runtime to be called by the GC */
691 static MonoGCCallbacks gc_callbacks;
694 * ######################################################################
695 * ######## Macros and function declarations.
696 * ######################################################################
699 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
700 if ((mword)(low) < lowest_heap_address) \
701 lowest_heap_address = (mword)(low); \
702 if ((mword)(high) > highest_heap_address) \
703 highest_heap_address = (mword)(high); \
707 align_pointer (void *ptr)
709 mword p = (mword)ptr;
710 p += sizeof (gpointer) - 1;
711 p &= ~ (sizeof (gpointer) - 1);
715 /* forward declarations */
716 static void* get_internal_mem (size_t size);
717 static void free_internal_mem (void *addr);
718 static void* get_os_memory (size_t size, int activate);
719 static void free_os_memory (void *addr, size_t size);
720 static G_GNUC_UNUSED void report_internal_mem_usage (void);
722 static int stop_world (void);
723 static int restart_world (void);
724 static void scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise);
725 static void scan_from_remsets (void *start_nursery, void *end_nursery);
726 static void find_pinning_ref_from_thread (char *obj, size_t size);
727 static void update_current_thread_stack (void *start);
728 static GCMemSection* alloc_section (size_t size);
729 static void finalize_in_range (char *start, char *end);
730 static void null_link_in_range (char *start, char *end);
731 static void null_links_for_domain (MonoDomain *domain);
732 static gboolean search_fragment_for_size (size_t size);
733 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
734 static void clear_remsets (void);
735 static void clear_tlabs (void);
736 static char *find_tlab_next_from_address (char *addr);
737 static void scan_pinned_objects (void (*callback) (PinnedChunk*, char*, size_t, void*), void *callback_data);
738 static void sweep_pinned_objects (void);
739 static void scan_from_pinned_objects (char *addr_start, char *addr_end);
740 static void free_large_object (LOSObject *obj);
741 static void free_mem_section (GCMemSection *section);
743 void describe_ptr (char *ptr);
744 void check_consistency (void);
745 char* check_object (char *start);
748 * ######################################################################
749 * ######## GC descriptors
750 * ######################################################################
751 * Used to quickly get the info the GC needs about an object: size and
752 * where the references are held.
754 /* objects are aligned to 8 bytes boundaries
755 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
756 * The low 3 bits define the type of the descriptor. The other bits
757 * depend on the type.
758 * As a general rule the 13 remaining low bits define the size, either
759 * of the whole object or of the elements in the arrays. While for objects
760 * the size is already in bytes, for arrays we need to shift, because
761 * array elements might be smaller than 8 bytes. In case of arrays, we
762 * use two bits to describe what the additional high bits represents,
763 * so the default behaviour can handle element sizes less than 2048 bytes.
764 * The high 16 bits, if 0 it means the object is pointer-free.
765 * This design should make it easy and fast to skip over ptr-free data.
766 * The first 4 types should cover >95% of the objects.
767 * Note that since the size of objects is limited to 64K, larger objects
768 * will be allocated in the large object heap.
769 * If we want 4-bytes alignment, we need to put vector and small bitmap
773 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
774 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
775 DESC_TYPE_STRING, /* nothing */
776 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
777 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
778 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
779 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
780 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
781 /* subtypes for arrays and vectors */
782 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
783 DESC_TYPE_V_REFS, /* all the array elements are refs */
784 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
785 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
788 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
789 #define LOW_TYPE_BITS 3
790 #define SMALL_BITMAP_SHIFT 16
791 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
792 #define VECTOR_INFO_SHIFT 14
793 #define VECTOR_ELSIZE_SHIFT 3
794 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
795 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
796 #define SMALL_SIZE_MASK 0xfff8
797 #define MAX_ELEMENT_SIZE 0x3ff
798 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
799 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
800 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
801 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
802 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
804 #define ALLOC_ALIGN 8
807 /* Root bitmap descriptors are simpler: the lower three bits describe the type
808 * and we either have 30/62 bitmap bits or nibble-based run-length,
809 * or a complex descriptor, or a user defined marker function.
812 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
817 ROOT_DESC_TYPE_MASK = 0x7,
818 ROOT_DESC_TYPE_SHIFT = 3,
821 #define MAKE_ROOT_DESC(type,val) ((type) | ((val) << ROOT_DESC_TYPE_SHIFT))
823 #define MAX_USER_DESCRIPTORS 16
825 static gsize* complex_descriptors = NULL;
826 static int complex_descriptors_size = 0;
827 static int complex_descriptors_next = 0;
828 static MonoGCMarkFunc user_descriptors [MAX_USER_DESCRIPTORS];
829 static int user_descriptors_next = 0;
832 alloc_complex_descriptor (gsize *bitmap, int numbits)
834 int nwords = numbits/GC_BITS_PER_WORD + 2;
839 res = complex_descriptors_next;
840 /* linear search, so we don't have duplicates with domain load/unload
841 * this should not be performance critical or we'd have bigger issues
842 * (the number and size of complex descriptors should be small).
844 for (i = 0; i < complex_descriptors_next; ) {
845 if (complex_descriptors [i] == nwords) {
847 for (j = 0; j < nwords - 1; ++j) {
848 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
858 i += complex_descriptors [i];
860 if (complex_descriptors_next + nwords > complex_descriptors_size) {
861 int new_size = complex_descriptors_size * 2 + nwords;
862 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
863 complex_descriptors_size = new_size;
865 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
866 complex_descriptors_next += nwords;
867 complex_descriptors [res] = nwords;
868 for (i = 0; i < nwords - 1; ++i) {
869 complex_descriptors [res + 1 + i] = bitmap [i];
870 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
877 * Descriptor builders.
880 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
882 return (void*) DESC_TYPE_STRING;
886 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
888 int first_set = -1, num_set = 0, last_set = -1, i;
890 size_t stored_size = obj_size;
891 stored_size += ALLOC_ALIGN - 1;
892 stored_size &= ~(ALLOC_ALIGN - 1);
893 for (i = 0; i < numbits; ++i) {
894 if (bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
901 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
902 /* check run-length encoding first: one byte offset, one byte number of pointers
903 * on 64 bit archs, we can have 3 runs, just one on 32.
904 * It may be better to use nibbles.
907 desc = DESC_TYPE_RUN_LENGTH | stored_size;
908 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
910 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
911 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
912 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
915 /* we know the 2-word header is ptr-free */
916 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
917 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
918 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
922 /* we know the 2-word header is ptr-free */
923 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
924 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
925 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
928 /* it's a complex object ... */
929 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
933 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
935 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
937 int first_set = -1, num_set = 0, last_set = -1, i;
938 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
939 for (i = 0; i < numbits; ++i) {
940 if (elem_bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
947 if (elem_size <= MAX_ELEMENT_SIZE) {
948 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
950 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
952 /* Note: we also handle structs with just ref fields */
953 if (num_set * sizeof (gpointer) == elem_size) {
954 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
956 /* FIXME: try run-len first */
957 /* Note: we can't skip the object header here, because it's not present */
958 if (last_set <= SMALL_BITMAP_SIZE) {
959 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
962 /* it's am array of complex structs ... */
963 desc = DESC_TYPE_COMPLEX_ARR;
964 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
968 /* Return the bitmap encoded by a descriptor */
970 mono_gc_get_bitmap_for_descr (void *descr, int *numbits)
972 mword d = (mword)descr;
976 case DESC_TYPE_RUN_LENGTH: {
977 int first_set = (d >> 16) & 0xff;
978 int num_set = (d >> 16) & 0xff;
981 bitmap = g_new0 (gsize, (first_set + num_set + 7) / 8);
983 for (i = first_set; i < first_set + num_set; ++i)
984 bitmap [i / GC_BITS_PER_WORD] |= ((gsize)1 << (i % GC_BITS_PER_WORD));
986 *numbits = first_set + num_set;
990 case DESC_TYPE_SMALL_BITMAP:
991 bitmap = g_new0 (gsize, 1);
993 bitmap [0] = (d >> SMALL_BITMAP_SHIFT) << OBJECT_HEADER_WORDS;
995 *numbits = GC_BITS_PER_WORD;
999 g_assert_not_reached ();
1003 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
1004 #define STRING_SIZE(size,str) do { \
1005 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
1006 (size) += (ALLOC_ALIGN - 1); \
1007 (size) &= ~(ALLOC_ALIGN - 1); \
1010 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
1011 (size) = (desc) & 0xfff8; \
1014 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
1015 (size) = (desc) & 0xfff8; \
1018 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
1019 #define PREFETCH(addr)
1021 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
1022 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
1023 if ((desc) & 0xffff0000) { \
1024 /* there are pointers */ \
1025 void **_objptr_end; \
1026 void **_objptr = (void**)(obj); \
1027 _objptr += ((desc) >> 16) & 0xff; \
1028 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
1029 while (_objptr < _objptr_end) { \
1030 HANDLE_PTR (_objptr, (obj)); \
1036 /* a bitmap desc means that there are pointer references or we'd have
1037 * choosen run-length, instead: add an assert to check.
1039 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
1040 /* there are pointers */ \
1041 void **_objptr = (void**)(obj); \
1042 gsize _bmap = (desc) >> 16; \
1043 _objptr += OBJECT_HEADER_WORDS; \
1045 if ((_bmap & 1)) { \
1046 HANDLE_PTR (_objptr, (obj)); \
1053 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
1054 /* there are pointers */ \
1055 void **_objptr = (void**)(obj); \
1056 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
1057 _objptr += OBJECT_HEADER_WORDS; \
1059 if ((_bmap & 1)) { \
1060 HANDLE_PTR (_objptr, (obj)); \
1067 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
1068 /* there are pointers */ \
1069 void **_objptr = (void**)(obj); \
1070 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1071 int bwords = (*bitmap_data) - 1; \
1072 void **start_run = _objptr; \
1075 MonoObject *myobj = (MonoObject*)obj; \
1076 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1078 while (bwords-- > 0) { \
1079 gsize _bmap = *bitmap_data++; \
1080 _objptr = start_run; \
1081 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
1083 if ((_bmap & 1)) { \
1084 HANDLE_PTR (_objptr, (obj)); \
1089 start_run += GC_BITS_PER_WORD; \
1093 /* this one is untested */
1094 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
1095 /* there are pointers */ \
1096 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1097 int mbwords = (*mbitmap_data++) - 1; \
1098 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
1099 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1100 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1102 MonoObject *myobj = (MonoObject*)start; \
1103 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1105 while (e_start < e_end) { \
1106 void **_objptr = (void**)e_start; \
1107 gsize *bitmap_data = mbitmap_data; \
1108 unsigned int bwords = mbwords; \
1109 while (bwords-- > 0) { \
1110 gsize _bmap = *bitmap_data++; \
1111 void **start_run = _objptr; \
1112 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1114 if ((_bmap & 1)) { \
1115 HANDLE_PTR (_objptr, (obj)); \
1120 _objptr = start_run + GC_BITS_PER_WORD; \
1122 e_start += el_size; \
1126 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1127 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1128 if ((vt)->desc & 0xffffc000) { \
1129 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1130 /* there are pointers */ \
1131 int etype = (vt)->desc & 0xc000; \
1132 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1133 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1134 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1135 /* Note: this code can handle also arrays of struct with only references in them */ \
1136 while (p < end_refs) { \
1137 HANDLE_PTR (p, (obj)); \
1140 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1141 int offset = ((vt)->desc >> 16) & 0xff; \
1142 int num_refs = ((vt)->desc >> 24) & 0xff; \
1143 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1144 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1145 while (e_start < e_end) { \
1146 void **p = (void**)e_start; \
1149 for (i = 0; i < num_refs; ++i) { \
1150 HANDLE_PTR (p + i, (obj)); \
1152 e_start += el_size; \
1154 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1155 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1156 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1157 while (e_start < e_end) { \
1158 void **p = (void**)e_start; \
1159 gsize _bmap = (vt)->desc >> 16; \
1160 /* Note: there is no object header here to skip */ \
1162 if ((_bmap & 1)) { \
1163 HANDLE_PTR (p, (obj)); \
1168 e_start += el_size; \
1174 static mword new_obj_references = 0;
1175 static mword obj_references_checked = 0;
1178 #define HANDLE_PTR(ptr,obj) do { \
1179 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1180 new_obj_references++; \
1181 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1183 obj_references_checked++; \
1188 * ######################################################################
1189 * ######## Detecting and removing garbage.
1190 * ######################################################################
1191 * This section of code deals with detecting the objects no longer in use
1192 * and reclaiming the memory.
1194 static void __attribute__((noinline))
1195 scan_area (char *start, char *end)
1200 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1202 new_obj_references = 0;
1203 obj_references_checked = 0;
1204 while (start < end) {
1205 if (!*(void**)start) {
1206 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1209 vt = (GCVTable*)LOAD_VTABLE (start);
1210 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1212 MonoObject *obj = (MonoObject*)start;
1213 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1217 if (type == DESC_TYPE_STRING) {
1218 STRING_SIZE (skip_size, start);
1222 } else if (type == DESC_TYPE_RUN_LENGTH) {
1223 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1224 g_assert (skip_size);
1225 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1229 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1230 skip_size = safe_object_get_size ((MonoObject*)start);
1231 skip_size += (ALLOC_ALIGN - 1);
1232 skip_size &= ~(ALLOC_ALIGN - 1);
1233 OBJ_VECTOR_FOREACH_PTR (vt, start);
1234 if (type == DESC_TYPE_ARRAY) {
1235 /* account for the bounds */
1240 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1241 OBJ_BITMAP_SIZE (skip_size, desc, start);
1242 g_assert (skip_size);
1243 OBJ_BITMAP_FOREACH_PTR (desc,start);
1247 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1248 skip_size = safe_object_get_size ((MonoObject*)start);
1249 skip_size += (ALLOC_ALIGN - 1);
1250 skip_size &= ~(ALLOC_ALIGN - 1);
1251 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1255 } else if (type == DESC_TYPE_COMPLEX) {
1256 /* this is a complex object */
1257 skip_size = safe_object_get_size ((MonoObject*)start);
1258 skip_size += (ALLOC_ALIGN - 1);
1259 skip_size &= ~(ALLOC_ALIGN - 1);
1260 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1264 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1265 /* this is an array of complex structs */
1266 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1267 skip_size *= mono_array_length ((MonoArray*)start);
1268 skip_size += sizeof (MonoArray);
1269 skip_size += (ALLOC_ALIGN - 1);
1270 skip_size &= ~(ALLOC_ALIGN - 1);
1271 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1272 if (type == DESC_TYPE_ARRAY) {
1273 /* account for the bounds */
1282 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1283 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1284 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1288 need_remove_object_for_domain (char *start, MonoDomain *domain)
1290 GCVTable *vt = (GCVTable*)LOAD_VTABLE (start);
1291 /* handle threads someway (maybe insert the root domain vtable?) */
1292 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1293 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1300 process_object_for_domain_clearing (char *start, MonoDomain *domain)
1302 GCVTable *vt = (GCVTable*)LOAD_VTABLE (start);
1303 /* The object could be a proxy for an object in the domain
1305 if (mono_class_has_parent (vt->klass, mono_defaults.real_proxy_class)) {
1306 MonoObject *server = ((MonoRealProxy*)start)->unwrapped_server;
1308 /* The server could already have been zeroed out, so
1309 we need to check for that, too. */
1310 if (server && (!LOAD_VTABLE (server) || mono_object_domain (server) == domain)) {
1311 DEBUG (1, fprintf (gc_debug_file, "Cleaning up remote pointer in %p to object %p (%s)\n",
1312 start, server, LOAD_VTABLE (server) ? safe_name (server) : "null"));
1313 ((MonoRealProxy*)start)->unwrapped_server = NULL;
1318 static void __attribute__((noinline))
1319 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1327 while (start < end) {
1328 if (!*(void**)start) {
1329 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1332 vt = (GCVTable*)LOAD_VTABLE (start);
1333 process_object_for_domain_clearing (start, domain);
1334 remove = need_remove_object_for_domain (start, domain);
1337 if (type == DESC_TYPE_STRING) {
1338 STRING_SIZE (skip_size, start);
1339 if (remove) memset (start, 0, skip_size);
1342 } else if (type == DESC_TYPE_RUN_LENGTH) {
1343 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1344 g_assert (skip_size);
1345 if (remove) memset (start, 0, skip_size);
1348 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1349 skip_size = safe_object_get_size ((MonoObject*)start);
1350 skip_size += (ALLOC_ALIGN - 1);
1351 skip_size &= ~(ALLOC_ALIGN - 1);
1352 if (type == DESC_TYPE_ARRAY) {
1353 /* account for the bounds */
1355 if (remove) memset (start, 0, skip_size);
1358 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1359 OBJ_BITMAP_SIZE (skip_size, desc, start);
1360 g_assert (skip_size);
1361 if (remove) memset (start, 0, skip_size);
1364 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1365 skip_size = safe_object_get_size ((MonoObject*)start);
1366 skip_size += (ALLOC_ALIGN - 1);
1367 skip_size &= ~(ALLOC_ALIGN - 1);
1368 if (remove) memset (start, 0, skip_size);
1371 } else if (type == DESC_TYPE_COMPLEX) {
1372 /* this is a complex object */
1373 skip_size = safe_object_get_size ((MonoObject*)start);
1374 skip_size += (ALLOC_ALIGN - 1);
1375 skip_size &= ~(ALLOC_ALIGN - 1);
1376 if (remove) memset (start, 0, skip_size);
1379 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1380 /* this is an array of complex structs */
1381 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1382 skip_size *= mono_array_length ((MonoArray*)start);
1383 skip_size += sizeof (MonoArray);
1384 skip_size += (ALLOC_ALIGN - 1);
1385 skip_size &= ~(ALLOC_ALIGN - 1);
1386 if (type == DESC_TYPE_ARRAY) {
1387 /* account for the bounds */
1389 if (remove) memset (start, 0, skip_size);
1399 clear_domain_process_pinned_object_callback (PinnedChunk *chunk, char *obj, size_t size, MonoDomain *domain)
1401 process_object_for_domain_clearing (obj, domain);
1405 clear_domain_free_pinned_object_callback (PinnedChunk *chunk, char *obj, size_t size, MonoDomain *domain)
1407 if (need_remove_object_for_domain (obj, domain))
1408 free_pinned_object (chunk, obj, size);
1412 * When appdomains are unloaded we can easily remove objects that have finalizers,
1413 * but all the others could still be present in random places on the heap.
1414 * We need a sweep to get rid of them even though it's going to be costly
1416 * The reason we need to remove them is because we access the vtable and class
1417 * structures to know the object size and the reference bitmap: once the domain is
1418 * unloaded the point to random memory.
1421 mono_gc_clear_domain (MonoDomain * domain)
1423 GCMemSection *section;
1424 LOSObject *bigobj, *prev;
1428 /* Clear all remaining nursery fragments */
1429 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
1430 g_assert (nursery_next <= nursery_frag_real_end);
1431 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
1432 for (frag = nursery_fragments; frag; frag = frag->next) {
1433 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
1437 null_links_for_domain (domain);
1439 for (section = section_list; section; section = section->next) {
1440 scan_area_for_domain (domain, section->data, section->end_data);
1443 /* We need two passes over pinned and large objects because
1444 freeing such an object gives its memory back to the OS (in
1445 the case of large objects) or obliterates its vtable
1446 (pinned objects), but we might need to dereference a
1447 pointer from an object to another object if the first
1448 object is a proxy. */
1449 scan_pinned_objects (clear_domain_process_pinned_object_callback, domain);
1450 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next)
1451 process_object_for_domain_clearing (bigobj->data, domain);
1454 for (bigobj = los_object_list; bigobj;) {
1455 if (need_remove_object_for_domain (bigobj->data, domain)) {
1456 LOSObject *to_free = bigobj;
1458 prev->next = bigobj->next;
1460 los_object_list = bigobj->next;
1461 bigobj = bigobj->next;
1462 DEBUG (1, fprintf (gc_debug_file, "Freeing large object %p (%s)\n",
1463 bigobj->data, safe_name (bigobj->data)));
1464 free_large_object (to_free);
1468 bigobj = bigobj->next;
1470 scan_pinned_objects (clear_domain_free_pinned_object_callback, domain);
1476 * add_to_global_remset:
1478 * The global remset contains locations which point into newspace after
1479 * a minor collection. This can happen if the objects they point to are pinned.
1482 add_to_global_remset (gpointer ptr, gboolean root)
1486 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1489 * FIXME: If an object remains pinned, we need to add it at every minor collection.
1490 * To avoid uncontrolled growth of the global remset, only add each pointer once.
1492 if (global_remset->store_next + 3 < global_remset->end_set) {
1494 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1495 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1497 *(global_remset->store_next++) = (mword)ptr;
1501 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1502 rs->next = global_remset;
1505 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1506 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1508 *(global_remset->store_next++) = (mword)ptr;
1512 int global_rs_size = 0;
1514 for (rs = global_remset; rs; rs = rs->next) {
1515 global_rs_size += rs->store_next - rs->data;
1517 DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size));
1522 * This is how the copying happens from the nursery to the old generation.
1523 * We assume that at this time all the pinned objects have been identified and
1525 * We run scan_object() for each pinned object so that each referenced
1526 * objects if possible are copied. The new gray objects created can have
1527 * scan_object() run on them right away, too.
1528 * Then we run copy_object() for the precisely tracked roots. At this point
1529 * all the roots are either gray or black. We run scan_object() on the gray
1530 * objects until no more gray objects are created.
1531 * At the end of the process we walk again the pinned list and we unmark
1532 * the pinned flag. As we go we also create the list of free space for use
1533 * in the next allocation runs.
1535 * We need to remember objects from the old generation that point to the new one
1536 * (or just addresses?).
1538 * copy_object could be made into a macro once debugged (use inline for now).
1541 static char* __attribute__((noinline))
1542 copy_object (char *obj, char *from_space_start, char *from_space_end)
1544 static void *copy_labels [] = { &&LAB_0, &&LAB_1, &&LAB_2, &&LAB_3, &&LAB_4, &&LAB_5, &&LAB_6, &&LAB_7, &&LAB_8 };
1547 * FIXME: The second set of checks is only needed if we are called for tospace
1550 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1554 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1555 if ((forwarded = object_is_forwarded (obj))) {
1556 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1557 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1560 if (object_is_pinned (obj)) {
1561 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1562 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1565 objsize = safe_object_get_size ((MonoObject*)obj);
1566 objsize += ALLOC_ALIGN - 1;
1567 objsize &= ~(ALLOC_ALIGN - 1);
1568 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1569 /* FIXME: handle pinned allocs:
1570 * Large objects are simple, at least until we always follow the rule:
1571 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1572 * At the end of major collections, we walk the los list and if
1573 * the object is pinned, it is marked, otherwise it can be freed.
1575 if (G_UNLIKELY (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj)))) {
1576 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1580 /* ok, the object is not pinned, we can move it */
1581 /* use a optimized memcpy here */
1582 if (objsize <= sizeof (gpointer) * 8) {
1583 mword *dest = (mword*)gray_objects;
1584 goto *copy_labels [objsize / sizeof (gpointer)];
1586 (dest) [7] = ((mword*)obj) [7];
1588 (dest) [6] = ((mword*)obj) [6];
1590 (dest) [5] = ((mword*)obj) [5];
1592 (dest) [4] = ((mword*)obj) [4];
1594 (dest) [3] = ((mword*)obj) [3];
1596 (dest) [2] = ((mword*)obj) [2];
1598 (dest) [1] = ((mword*)obj) [1];
1600 (dest) [0] = ((mword*)obj) [0];
1608 char* edi = gray_objects;
1609 __asm__ __volatile__(
1611 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1612 : "0" (objsize/4), "1" (edi),"2" (esi)
1617 memcpy (gray_objects, obj, objsize);
1620 /* adjust array->bounds */
1621 vt = ((MonoObject*)obj)->vtable;
1622 g_assert (vt->gc_descr);
1623 if (G_UNLIKELY (vt->rank && ((MonoArray*)obj)->bounds)) {
1624 MonoArray *array = (MonoArray*)gray_objects;
1625 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1626 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1628 /* set the forwarding pointer */
1629 forward_object (obj, gray_objects);
1631 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1632 gray_objects += objsize;
1633 DEBUG (8, g_assert (gray_objects <= to_space_end));
1640 #define HANDLE_PTR(ptr,obj) do { \
1641 void *__old = *(ptr); \
1643 *(ptr) = copy_object (__old, from_start, from_end); \
1644 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1645 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) && !ptr_in_nursery (ptr)) \
1646 add_to_global_remset ((ptr), FALSE); \
1651 * Scan the object pointed to by @start for references to
1652 * other objects between @from_start and @from_end and copy
1653 * them to the gray_objects area.
1654 * Returns a pointer to the end of the object.
1657 scan_object (char *start, char* from_start, char* from_end)
1663 vt = (GCVTable*)LOAD_VTABLE (start);
1664 //type = vt->desc & 0x7;
1666 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1668 switch (desc & 0x7) {
1669 //if (type == DESC_TYPE_STRING) {
1670 case DESC_TYPE_STRING:
1671 STRING_SIZE (skip_size, start);
1672 return start + skip_size;
1673 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1674 case DESC_TYPE_RUN_LENGTH:
1675 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1676 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1677 g_assert (skip_size);
1678 return start + skip_size;
1679 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1680 case DESC_TYPE_ARRAY:
1681 case DESC_TYPE_VECTOR:
1682 OBJ_VECTOR_FOREACH_PTR (vt, start);
1683 skip_size = safe_object_get_size ((MonoObject*)start);
1685 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1686 skip_size *= mono_array_length ((MonoArray*)start);
1687 skip_size += sizeof (MonoArray);
1689 skip_size += (ALLOC_ALIGN - 1);
1690 skip_size &= ~(ALLOC_ALIGN - 1);
1691 return start + skip_size;
1692 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1693 case DESC_TYPE_SMALL_BITMAP:
1694 OBJ_BITMAP_FOREACH_PTR (desc,start);
1695 OBJ_BITMAP_SIZE (skip_size, desc, start);
1696 return start + skip_size;
1697 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1698 case DESC_TYPE_LARGE_BITMAP:
1699 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1700 skip_size = safe_object_get_size ((MonoObject*)start);
1701 skip_size += (ALLOC_ALIGN - 1);
1702 skip_size &= ~(ALLOC_ALIGN - 1);
1703 return start + skip_size;
1704 //} else if (type == DESC_TYPE_COMPLEX) {
1705 case DESC_TYPE_COMPLEX:
1706 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1707 /* this is a complex object */
1708 skip_size = safe_object_get_size ((MonoObject*)start);
1709 skip_size += (ALLOC_ALIGN - 1);
1710 skip_size &= ~(ALLOC_ALIGN - 1);
1711 return start + skip_size;
1712 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1713 case DESC_TYPE_COMPLEX_ARR:
1714 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1715 /* this is an array of complex structs */
1716 skip_size = safe_object_get_size ((MonoObject*)start);
1718 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1719 skip_size *= mono_array_length ((MonoArray*)start);
1720 skip_size += sizeof (MonoArray);
1722 skip_size += (ALLOC_ALIGN - 1);
1723 skip_size &= ~(ALLOC_ALIGN - 1);
1724 return start + skip_size;
1726 g_assert_not_reached ();
1733 * Scan objects in the gray stack until the stack is empty. This should be called
1734 * frequently after each object is copied, to achieve better locality and cache
1738 drain_gray_stack (char *start_addr, char *end_addr)
1740 char *gray_start = gray_first;
1742 while (gray_start < gray_objects) {
1743 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1744 gray_start = scan_object (gray_start, start_addr, end_addr);
1747 gray_first = gray_start;
1753 * Scan the valuetype pointed to by START, described by DESC for references to
1754 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1755 * Returns a pointer to the end of the object.
1758 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1762 /* The descriptors include info about the MonoObject header as well */
1763 start -= sizeof (MonoObject);
1765 switch (desc & 0x7) {
1766 case DESC_TYPE_RUN_LENGTH:
1767 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1768 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1769 g_assert (skip_size);
1770 return start + skip_size;
1771 case DESC_TYPE_SMALL_BITMAP:
1772 OBJ_BITMAP_FOREACH_PTR (desc,start);
1773 OBJ_BITMAP_SIZE (skip_size, desc, start);
1774 return start + skip_size;
1775 case DESC_TYPE_LARGE_BITMAP:
1776 case DESC_TYPE_COMPLEX:
1778 g_assert_not_reached ();
1781 // The other descriptors can't happen with vtypes
1782 g_assert_not_reached ();
1789 * Addresses from start to end are already sorted. This function finds the object header
1790 * for each address and pins the object. The addresses must be inside the passed section.
1791 * Return the number of pinned objects.
1794 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1799 void *last_obj = NULL;
1800 size_t last_obj_size = 0;
1803 void **definitely_pinned = start;
1804 while (start < end) {
1806 /* the range check should be reduntant */
1807 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1808 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1809 /* multiple pointers to the same object */
1810 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1814 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1815 search_start = (void*)section->scan_starts [idx];
1816 if (!search_start || search_start > addr) {
1819 search_start = section->scan_starts [idx];
1820 if (search_start && search_start <= addr)
1823 if (!search_start || search_start > addr)
1824 search_start = start_nursery;
1826 if (search_start < last_obj)
1827 search_start = (char*)last_obj + last_obj_size;
1828 /* now addr should be in an object a short distance from search_start
1829 * Note that search_start must point to zeroed mem or point to an object.
1832 if (!*(void**)search_start) {
1833 mword p = (mword)search_start;
1834 p += sizeof (gpointer);
1835 p += ALLOC_ALIGN - 1;
1836 p &= ~(ALLOC_ALIGN - 1);
1837 search_start = (void*)p;
1840 last_obj = search_start;
1841 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1842 last_obj_size += ALLOC_ALIGN - 1;
1843 last_obj_size &= ~(ALLOC_ALIGN - 1);
1844 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1845 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1846 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1847 pin_object (search_start);
1848 definitely_pinned [count] = search_start;
1852 /* skip to the next object */
1853 search_start = (void*)((char*)search_start + last_obj_size);
1854 } while (search_start <= addr);
1855 /* we either pinned the correct object or we ignored the addr because
1856 * it points to unused zeroed memory.
1862 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1866 static void** pin_queue;
1867 static int pin_queue_size = 0;
1868 static int next_pin_slot = 0;
1873 gap = (gap * 10) / 13;
1874 if (gap == 9 || gap == 10)
1883 compare_addr (const void *a, const void *b)
1885 return *(const void **)a - *(const void **)b;
1889 /* sort the addresses in array in increasing order */
1891 sort_addresses (void **array, int size)
1894 * qsort is slower as predicted.
1895 * qsort (array, size, sizeof (gpointer), compare_addr);
1902 gap = new_gap (gap);
1905 for (i = 0; i < end; i++) {
1907 if (array [i] > array [j]) {
1908 void* val = array [i];
1909 array [i] = array [j];
1914 if (gap == 1 && !swapped)
1919 static G_GNUC_UNUSED void
1920 print_nursery_gaps (void* start_nursery, void *end_nursery)
1923 gpointer first = start_nursery;
1925 for (i = 0; i < next_pin_slot; ++i) {
1926 next = pin_queue [i];
1927 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1931 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1934 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1936 optimize_pin_queue (int start_slot)
1938 void **start, **cur, **end;
1939 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1940 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1941 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1942 if ((next_pin_slot - start_slot) > 1)
1943 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1944 start = cur = pin_queue + start_slot;
1945 end = pin_queue + next_pin_slot;
1948 while (*start == *cur && cur < end)
1952 next_pin_slot = start - pin_queue;
1953 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1954 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1959 realloc_pin_queue (void)
1961 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1962 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1963 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1964 free_internal_mem (pin_queue);
1965 pin_queue = new_pin;
1966 pin_queue_size = new_size;
1967 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1971 * Scan the memory between start and end and queue values which could be pointers
1972 * to the area between start_nursery and end_nursery for later consideration.
1973 * Typically used for thread stacks.
1976 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1979 while (start < end) {
1980 if (*start >= start_nursery && *start < end_nursery) {
1982 * *start can point to the middle of an object
1983 * note: should we handle pointing at the end of an object?
1984 * pinning in C# code disallows pointing at the end of an object
1985 * but there is some small chance that an optimizing C compiler
1986 * may keep the only reference to an object by pointing
1987 * at the end of it. We ignore this small chance for now.
1988 * Pointers to the end of an object are indistinguishable
1989 * from pointers to the start of the next object in memory
1990 * so if we allow that we'd need to pin two objects...
1991 * We queue the pointer in an array, the
1992 * array will then be sorted and uniqued. This way
1993 * we can coalesce several pinning pointers and it should
1994 * be faster since we'd do a memory scan with increasing
1995 * addresses. Note: we can align the address to the allocation
1996 * alignment, so the unique process is more effective.
1998 mword addr = (mword)*start;
1999 addr &= ~(ALLOC_ALIGN - 1);
2000 if (next_pin_slot >= pin_queue_size)
2001 realloc_pin_queue ();
2002 pin_queue [next_pin_slot++] = (void*)addr;
2003 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
2008 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
2010 #ifdef HAVE_VALGRIND_MEMCHECK_H
2012 * The pinning addresses might come from undefined memory, this is normal. Since they
2013 * are used in lots of functions, we make the memory defined here instead of having
2014 * to add a supression for those functions.
2016 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
2021 * If generation is 0, just mark objects in the nursery, the others we don't care,
2022 * since they are not going to move anyway.
2023 * There are different areas that are scanned for pinned pointers:
2024 * *) the thread stacks (when jit support is ready only the unmanaged frames)
2025 * *) the pinned handle table
2026 * *) the pinned roots
2028 * Note: when we'll use a write barrier for old to new gen references, we need to
2029 * keep track of old gen objects that point to pinned new gen objects because in that
2030 * case the referenced object will be moved maybe at the next collection, but there
2031 * is no write in the old generation area where the pinned object is referenced
2032 * and we may not consider it as reachable.
2034 static G_GNUC_UNUSED void
2035 mark_pinned_objects (int generation)
2040 * Debugging function: find in the conservative roots where @obj is being pinned.
2042 static G_GNUC_UNUSED void
2043 find_pinning_reference (char *obj, size_t size)
2047 char *endobj = obj + size;
2048 for (i = 0; i < roots_hash_size [0]; ++i) {
2049 for (root = roots_hash [0][i]; root; root = root->next) {
2050 /* if desc is non-null it has precise info */
2051 if (!root->root_desc) {
2052 char ** start = (char**)root->start_root;
2053 while (start < (char**)root->end_root) {
2054 if (*start >= obj && *start < endobj) {
2055 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
2062 find_pinning_ref_from_thread (obj, size);
2066 * The first thing we do in a collection is to identify pinned objects.
2067 * This function considers all the areas of memory that need to be
2068 * conservatively scanned.
2071 pin_from_roots (void *start_nursery, void *end_nursery)
2075 DEBUG (2, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d/%d entries)\n", (int)roots_size, num_roots_entries [ROOT_TYPE_NORMAL], num_roots_entries [ROOT_TYPE_PINNED]));
2076 /* objects pinned from the API are inside these roots */
2077 for (i = 0; i < roots_hash_size [ROOT_TYPE_PINNED]; ++i) {
2078 for (root = roots_hash [ROOT_TYPE_PINNED][i]; root; root = root->next) {
2079 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
2080 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
2083 /* now deal with the thread stacks
2084 * in the future we should be able to conservatively scan only:
2085 * *) the cpu registers
2086 * *) the unmanaged stack frames
2087 * *) the _last_ managed stack frame
2088 * *) pointers slots in managed frames
2090 scan_thread_data (start_nursery, end_nursery, FALSE);
2093 /* Copy function called from user defined mark functions */
2094 static char *user_copy_n_start;
2095 static char *user_copy_n_end;
2098 user_copy (void *addr)
2101 return copy_object (addr, user_copy_n_start, user_copy_n_end);
2107 * The memory area from start_root to end_root contains pointers to objects.
2108 * Their position is precisely described by @desc (this means that the pointer
2109 * can be either NULL or the pointer to the start of an object).
2110 * This functions copies them to to_space updates them.
2113 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
2115 switch (desc & ROOT_DESC_TYPE_MASK) {
2116 case ROOT_DESC_BITMAP:
2117 desc >>= ROOT_DESC_TYPE_SHIFT;
2119 if ((desc & 1) && *start_root) {
2120 *start_root = copy_object (*start_root, n_start, n_end);
2121 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root));
2122 drain_gray_stack (n_start, n_end);
2128 case ROOT_DESC_COMPLEX: {
2129 gsize *bitmap_data = complex_descriptors + (desc >> ROOT_DESC_TYPE_SHIFT);
2130 int bwords = (*bitmap_data) - 1;
2131 void **start_run = start_root;
2133 while (bwords-- > 0) {
2134 gsize bmap = *bitmap_data++;
2135 void **objptr = start_run;
2137 if ((bmap & 1) && *objptr) {
2138 *objptr = copy_object (*objptr, n_start, n_end);
2139 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", objptr, *objptr));
2140 drain_gray_stack (n_start, n_end);
2145 start_run += GC_BITS_PER_WORD;
2149 case ROOT_DESC_USER: {
2150 MonoGCMarkFunc marker = user_descriptors [desc >> ROOT_DESC_TYPE_SHIFT];
2152 user_copy_n_start = n_start;
2153 user_copy_n_end = n_end;
2154 marker (start_root, user_copy);
2157 case ROOT_DESC_RUN_LEN:
2158 g_assert_not_reached ();
2160 g_assert_not_reached ();
2165 alloc_fragment (void)
2167 Fragment *frag = fragment_freelist;
2169 fragment_freelist = frag->next;
2173 frag = get_internal_mem (sizeof (Fragment));
2179 * Allocate and setup the data structures needed to be able to allocate objects
2180 * in the nursery. The nursery is stored in nursery_section.
2183 alloc_nursery (void)
2185 GCMemSection *section;
2191 if (nursery_section)
2193 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
2194 /* later we will alloc a larger area for the nursery but only activate
2195 * what we need. The rest will be used as expansion if we have too many pinned
2196 * objects in the existing nursery.
2198 /* FIXME: handle OOM */
2199 section = get_internal_mem (sizeof (GCMemSection));
2201 #ifdef ALIGN_NURSERY
2202 /* Allocate twice the memory to be able to put the nursery at an aligned address */
2203 g_assert (nursery_size == DEFAULT_NURSERY_SIZE);
2205 alloc_size = nursery_size * 2;
2206 data = get_os_memory (alloc_size, TRUE);
2207 nursery_start = (void*)(((mword)data + (1 << DEFAULT_NURSERY_BITS) - 1) & ~((1 << DEFAULT_NURSERY_BITS) - 1));
2208 g_assert ((char*)nursery_start + nursery_size <= ((char*)data + alloc_size));
2209 /* FIXME: Use the remaining size for something else, if it is big enough */
2211 alloc_size = nursery_size;
2212 data = get_os_memory (alloc_size, TRUE);
2213 nursery_start = data;
2215 nursery_real_end = nursery_start + nursery_size;
2216 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
2217 nursery_next = nursery_start;
2218 total_alloc += alloc_size;
2219 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
2220 section->data = section->next_data = data;
2221 section->size = alloc_size;
2222 section->end_data = nursery_real_end;
2223 scan_starts = alloc_size / SCAN_START_SIZE;
2224 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2225 section->num_scan_start = scan_starts;
2226 section->role = MEMORY_ROLE_GEN0;
2228 /* add to the section list */
2229 section->next = section_list;
2230 section_list = section;
2232 nursery_section = section;
2234 /* Setup the single first large fragment */
2235 frag = alloc_fragment ();
2236 frag->fragment_start = nursery_start;
2237 frag->fragment_limit = nursery_start;
2238 frag->fragment_end = nursery_real_end;
2239 nursery_frag_real_end = nursery_real_end;
2240 /* FIXME: frag here is lost */
2244 scan_finalizer_entries (FinalizeEntry *list, char *start, char *end) {
2247 for (fin = list; fin; fin = fin->next) {
2250 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2251 fin->object = copy_object (fin->object, start, end);
2256 * Update roots in the old generation. Since we currently don't have the
2257 * info from the write barriers, we just scan all the objects.
2259 static G_GNUC_UNUSED void
2260 scan_old_generation (char *start, char* end)
2262 GCMemSection *section;
2263 LOSObject *big_object;
2266 for (section = section_list; section; section = section->next) {
2267 if (section == nursery_section)
2269 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
2270 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
2272 while (p < section->next_data) {
2277 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
2278 p = scan_object (p, start, end);
2281 /* scan the old object space, too */
2282 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2283 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2284 scan_object (big_object->data, start, end);
2286 /* scan the list of objects ready for finalization */
2287 scan_finalizer_entries (fin_ready_list, start, end);
2288 scan_finalizer_entries (critical_fin_list, start, end);
2291 static mword fragment_total = 0;
2293 * We found a fragment of free memory in the nursery: memzero it and if
2294 * it is big enough, add it to the list of fragments that can be used for
2298 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
2301 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
2302 /* memsetting just the first chunk start is bound to provide better cache locality */
2303 if (nursery_clear_policy == CLEAR_AT_GC)
2304 memset (frag_start, 0, frag_size);
2305 /* Not worth dealing with smaller fragments: need to tune */
2306 if (frag_size >= FRAGMENT_MIN_SIZE) {
2307 fragment = alloc_fragment ();
2308 fragment->fragment_start = frag_start;
2309 fragment->fragment_limit = frag_start;
2310 fragment->fragment_end = frag_end;
2311 fragment->next = nursery_fragments;
2312 nursery_fragments = fragment;
2313 fragment_total += frag_size;
2315 /* Clear unused fragments, pinning depends on this */
2316 memset (frag_start, 0, frag_size);
2321 scan_needed_big_objects (char *start_addr, char *end_addr)
2323 LOSObject *big_object;
2325 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2326 if (!big_object->scanned && object_is_pinned (big_object->data)) {
2327 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2328 scan_object (big_object->data, start_addr, end_addr);
2329 big_object->scanned = TRUE;
2337 finish_gray_stack (char *start_addr, char *end_addr)
2341 int fin_ready, bigo_scanned_num;
2345 * We copied all the reachable objects. Now it's the time to copy
2346 * the objects that were not referenced by the roots, but by the copied objects.
2347 * we built a stack of objects pointed to by gray_start: they are
2348 * additional roots and we may add more items as we go.
2349 * We loop until gray_start == gray_objects which means no more objects have
2350 * been added. Note this is iterative: no recursion is involved.
2351 * We need to walk the LO list as well in search of marked big objects
2352 * (use a flag since this is needed only on major collections). We need to loop
2353 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2354 * To achieve better cache locality and cache usage, we drain the gray stack
2355 * frequently, after each object is copied, and just finish the work here.
2357 gray_start = gray_first;
2358 while (gray_start < gray_objects) {
2359 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2360 gray_start = scan_object (gray_start, start_addr, end_addr);
2363 //scan_old_generation (start_addr, end_addr);
2364 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2365 /* walk the finalization queue and move also the objects that need to be
2366 * finalized: use the finalized objects as new roots so the objects they depend
2367 * on are also not reclaimed. As with the roots above, only objects in the nursery
2368 * are marked/copied.
2369 * We need a loop here, since objects ready for finalizers may reference other objects
2370 * that are fin-ready. Speedup with a flag?
2373 fin_ready = num_ready_finalizers;
2374 finalize_in_range (start_addr, end_addr);
2375 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2377 /* drain the new stack that might have been created */
2378 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2379 while (gray_start < gray_objects) {
2380 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2381 gray_start = scan_object (gray_start, start_addr, end_addr);
2383 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2385 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2386 to_space = gray_start;
2387 to_space_section->next_data = to_space;
2390 * handle disappearing links
2391 * Note we do this after checking the finalization queue because if an object
2392 * survives (at least long enough to be finalized) we don't clear the link.
2393 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2394 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2397 null_link_in_range (start_addr, end_addr);
2399 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2402 static int last_num_pinned = 0;
2405 build_nursery_fragments (int start_pin, int end_pin)
2407 char *frag_start, *frag_end;
2411 /* FIXME: handle non-NULL fragment_freelist */
2412 fragment_freelist = nursery_fragments;
2413 nursery_fragments = NULL;
2414 frag_start = nursery_start;
2416 /* clear scan starts */
2417 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2418 for (i = start_pin; i < end_pin; ++i) {
2419 frag_end = pin_queue [i];
2420 /* remove the pin bit from pinned objects */
2421 unpin_object (frag_end);
2422 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2423 frag_size = frag_end - frag_start;
2425 add_nursery_frag (frag_size, frag_start, frag_end);
2426 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2427 frag_size += ALLOC_ALIGN - 1;
2428 frag_size &= ~(ALLOC_ALIGN - 1);
2429 frag_start = (char*)pin_queue [i] + frag_size;
2431 * pin_queue [i] might point to a half-constructed string or vector whose
2432 * length field is not set. In that case, frag_start points inside the
2433 * (zero initialized) object. Find the end of the object by scanning forward.
2436 if (is_maybe_half_constructed (pin_queue [i])) {
2439 /* This is also hit for zero length arrays/strings */
2441 /* Find the end of the TLAB which contained this allocation */
2442 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2445 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2446 frag_start += sizeof (mword);
2449 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2450 * half constructed object.
2455 nursery_last_pinned_end = frag_start;
2456 frag_end = nursery_real_end;
2457 frag_size = frag_end - frag_start;
2459 add_nursery_frag (frag_size, frag_start, frag_end);
2460 if (!nursery_fragments) {
2461 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2462 for (i = start_pin; i < end_pin; ++i) {
2463 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2468 nursery_next = nursery_frag_real_end = NULL;
2470 /* Clear TLABs for all threads */
2474 /* FIXME: later reduce code duplication here with the above
2475 * We don't keep track of section fragments for non-nursery sections yet, so
2479 build_section_fragments (GCMemSection *section)
2482 char *frag_start, *frag_end;
2485 /* clear scan starts */
2486 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2487 frag_start = section->data;
2488 section->next_data = section->data;
2489 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2490 frag_end = pin_queue [i];
2491 /* remove the pin bit from pinned objects */
2492 unpin_object (frag_end);
2493 if (frag_end >= section->data + section->size) {
2494 frag_end = section->data + section->size;
2496 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2498 frag_size = frag_end - frag_start;
2500 memset (frag_start, 0, frag_size);
2501 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2502 frag_size += ALLOC_ALIGN - 1;
2503 frag_size &= ~(ALLOC_ALIGN - 1);
2504 frag_start = (char*)pin_queue [i] + frag_size;
2505 section->next_data = MAX (section->next_data, frag_start);
2507 frag_end = section->end_data;
2508 frag_size = frag_end - frag_start;
2510 memset (frag_start, 0, frag_size);
2514 scan_from_registered_roots (char *addr_start, char *addr_end, int root_type)
2518 for (i = 0; i < roots_hash_size [root_type]; ++i) {
2519 for (root = roots_hash [root_type][i]; root; root = root->next) {
2520 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2521 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2527 * Collect objects in the nursery.
2530 collect_nursery (size_t requested_size)
2532 GCMemSection *section;
2533 size_t max_garbage_amount;
2535 char *orig_nursery_next;
2537 TV_DECLARE (all_atv);
2538 TV_DECLARE (all_btv);
2543 orig_nursery_next = nursery_next;
2544 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2545 /* FIXME: optimize later to use the higher address where an object can be present */
2546 nursery_next = MAX (nursery_next, nursery_real_end);
2548 if (consistency_check_at_minor_collection)
2549 check_consistency ();
2551 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2552 max_garbage_amount = nursery_next - nursery_start;
2554 /* Clear all remaining nursery fragments, pinning depends on this */
2555 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2556 g_assert (orig_nursery_next <= nursery_frag_real_end);
2557 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2558 for (frag = nursery_fragments; frag; frag = frag->next) {
2559 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2564 * not enough room in the old generation to store all the possible data from
2565 * the nursery in a single continuous space.
2566 * We reset to_space if we allocated objects in degraded mode.
2568 if (to_space_section)
2569 to_space = gray_objects = gray_first = to_space_section->next_data;
2570 if ((to_space_end - to_space) < max_garbage_amount) {
2571 section = alloc_section (nursery_section->size * 4);
2572 g_assert (nursery_section->size >= max_garbage_amount);
2573 to_space = gray_objects = gray_first = section->next_data;
2574 to_space_end = section->end_data;
2575 to_space_section = section;
2577 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2578 nursery_section->next_data = nursery_next;
2581 mono_stats.minor_gc_count ++;
2582 /* world must be stopped already */
2583 TV_GETTIME (all_atv);
2585 /* pin from pinned handles */
2586 pin_from_roots (nursery_start, nursery_next);
2587 /* identify pinned objects */
2588 optimize_pin_queue (0);
2589 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2591 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2592 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2595 * walk all the roots and copy the young objects to the old generation,
2596 * starting from to_space
2599 scan_from_remsets (nursery_start, nursery_next);
2600 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2602 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2604 /* the pinned objects are roots */
2605 for (i = 0; i < next_pin_slot; ++i) {
2606 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2607 scan_object (pin_queue [i], nursery_start, nursery_next);
2609 /* registered roots, this includes static fields */
2610 scan_from_registered_roots (nursery_start, nursery_next, ROOT_TYPE_NORMAL);
2611 scan_thread_data (nursery_start, nursery_next, TRUE);
2612 /* alloc_pinned objects */
2613 scan_from_pinned_objects (nursery_start, nursery_next);
2615 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2617 finish_gray_stack (nursery_start, nursery_next);
2619 /* walk the pin_queue, build up the fragment list of free memory, unmark
2620 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2623 build_nursery_fragments (0, next_pin_slot);
2625 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2627 TV_GETTIME (all_btv);
2628 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2630 /* prepare the pin queue for the next collection */
2631 last_num_pinned = next_pin_slot;
2633 if (fin_ready_list || critical_fin_list) {
2634 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2635 mono_gc_finalize_notify ();
2640 major_collection (void)
2642 GCMemSection *section, *prev_section;
2643 LOSObject *bigobj, *prevbo;
2648 TV_DECLARE (all_atv);
2649 TV_DECLARE (all_btv);
2652 /* FIXME: only use these values for the precise scan
2653 * note that to_space pointers should be excluded anyway...
2655 char *heap_start = NULL;
2656 char *heap_end = (char*)-1;
2657 size_t copy_space_required = 0;
2660 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2662 mono_stats.major_gc_count ++;
2664 /* Clear all remaining nursery fragments, pinning depends on this */
2665 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2666 g_assert (nursery_next <= nursery_frag_real_end);
2667 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2668 for (frag = nursery_fragments; frag; frag = frag->next) {
2669 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2674 * FIXME: implement Mark/Compact
2675 * Until that is done, we can just apply mostly the same alg as for the nursery:
2676 * this means we need a big section to potentially copy all the other sections, so
2677 * it is not ideal specially with large heaps.
2679 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2680 collect_nursery (0);
2683 TV_GETTIME (all_atv);
2684 /* FIXME: make sure the nursery next_data ptr is updated */
2685 nursery_section->next_data = nursery_real_end;
2686 /* we should also coalesce scanning from sections close to each other
2687 * and deal with pointers outside of the sections later.
2689 /* The remsets are not useful for a major collection */
2691 /* world must be stopped already */
2693 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2694 for (section = section_list; section; section = section->next) {
2695 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2696 pin_from_roots (section->data, section->next_data);
2697 if (count != next_pin_slot) {
2699 optimize_pin_queue (count);
2700 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2701 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2702 section->pin_queue_end = next_pin_slot = count + reduced_to;
2704 copy_space_required += (char*)section->next_data - (char*)section->data;
2706 /* identify possible pointers to the insize of large objects */
2707 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2708 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2709 count = next_pin_slot;
2710 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2711 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2712 if (next_pin_slot != count) {
2713 next_pin_slot = count;
2714 pin_object (bigobj->data);
2715 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2718 /* look for pinned addresses for pinned-alloc objects */
2719 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2720 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2721 count = next_pin_slot;
2722 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2723 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2724 if (next_pin_slot != count) {
2725 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2726 next_pin_slot = count;
2731 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2732 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2734 /* allocate the big to space */
2735 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2736 section = alloc_section (copy_space_required);
2737 to_space = gray_objects = gray_first = section->next_data;
2738 to_space_end = section->end_data;
2739 to_space_section = section;
2741 /* the old generation doesn't need to be scanned (no remembered sets or card
2742 * table needed either): the only objects that must survive are those pinned and
2743 * those referenced by the precise roots.
2744 * mark any section without pinned objects, so we can free it since we will be able to
2745 * move all the objects.
2747 /* the pinned objects are roots (big objects are included in this list, too) */
2748 for (i = 0; i < next_pin_slot; ++i) {
2749 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2750 scan_object (pin_queue [i], heap_start, heap_end);
2752 /* registered roots, this includes static fields */
2753 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_NORMAL);
2754 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_WBARRIER);
2756 scan_thread_data (heap_start, heap_end, TRUE);
2757 /* alloc_pinned objects */
2758 scan_from_pinned_objects (heap_start, heap_end);
2759 /* scan the list of objects ready for finalization */
2760 scan_finalizer_entries (fin_ready_list, heap_start, heap_end);
2761 scan_finalizer_entries (critical_fin_list, heap_start, heap_end);
2763 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2765 /* we need to go over the big object list to see if any was marked and scan it
2766 * And we need to make this in a loop, considering that objects referenced by finalizable
2767 * objects could reference big objects (this happens in finish_gray_stack ())
2769 scan_needed_big_objects (heap_start, heap_end);
2770 /* all the objects in the heap */
2771 finish_gray_stack (heap_start, heap_end);
2773 /* sweep the big objects list */
2775 for (bigobj = los_object_list; bigobj;) {
2776 if (object_is_pinned (bigobj->data)) {
2777 unpin_object (bigobj->data);
2778 bigobj->scanned = FALSE;
2781 /* not referenced anywhere, so we can free it */
2783 prevbo->next = bigobj->next;
2785 los_object_list = bigobj->next;
2787 bigobj = bigobj->next;
2788 free_large_object (to_free);
2792 bigobj = bigobj->next;
2794 /* unpin objects from the pinned chunks and free the unmarked ones */
2795 sweep_pinned_objects ();
2797 /* free the unused sections */
2798 prev_section = NULL;
2799 for (section = section_list; section;) {
2800 /* to_space doesn't need handling here and the nursery is special */
2801 if (section == to_space_section || section == nursery_section) {
2802 prev_section = section;
2803 section = section->next;
2806 /* no pinning object, so the section is free */
2807 if (section->pin_queue_start == section->pin_queue_end) {
2808 GCMemSection *to_free;
2810 prev_section->next = section->next;
2812 section_list = section->next;
2814 section = section->next;
2815 free_mem_section (to_free);
2818 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2819 build_section_fragments (section);
2821 prev_section = section;
2822 section = section->next;
2825 /* walk the pin_queue, build up the fragment list of free memory, unmark
2826 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2829 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2831 TV_GETTIME (all_btv);
2832 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2833 /* prepare the pin queue for the next collection */
2835 if (fin_ready_list || critical_fin_list) {
2836 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2837 mono_gc_finalize_notify ();
2842 * Allocate a new section of memory to be used as old generation.
2844 static GCMemSection*
2845 alloc_section (size_t size)
2847 GCMemSection *section;
2850 size_t new_size = next_section_size;
2852 if (size > next_section_size) {
2854 new_size += pagesize - 1;
2855 new_size &= ~(pagesize - 1);
2857 section_size_used++;
2858 if (section_size_used > 3) {
2859 section_size_used = 0;
2860 next_section_size *= 2;
2861 if (next_section_size > max_section_size)
2862 next_section_size = max_section_size;
2864 section = get_internal_mem (sizeof (GCMemSection));
2865 data = get_os_memory (new_size, TRUE);
2866 section->data = section->next_data = data;
2867 section->size = new_size;
2868 section->end_data = data + new_size;
2869 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2870 total_alloc += new_size;
2871 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2872 section->data = data;
2873 section->size = new_size;
2874 scan_starts = new_size / SCAN_START_SIZE;
2875 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2876 section->num_scan_start = scan_starts;
2877 section->role = MEMORY_ROLE_GEN1;
2879 /* add to the section list */
2880 section->next = section_list;
2881 section_list = section;
2887 free_mem_section (GCMemSection *section)
2889 char *data = section->data;
2890 size_t size = section->size;
2891 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2892 free_os_memory (data, size);
2893 free_internal_mem (section);
2894 total_alloc -= size;
2898 * When deciding if it's better to collect or to expand, keep track
2899 * of how much garbage was reclaimed with the last collection: if it's too
2901 * This is called when we could not allocate a small object.
2903 static void __attribute__((noinline))
2904 minor_collect_or_expand_inner (size_t size)
2906 int do_minor_collection = 1;
2908 if (!nursery_section) {
2912 if (do_minor_collection) {
2914 collect_nursery (size);
2915 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2917 /* this also sets the proper pointers for the next allocation */
2918 if (!search_fragment_for_size (size)) {
2920 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2921 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2922 for (i = 0; i < last_num_pinned; ++i) {
2923 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2928 //report_internal_mem_usage ();
2932 * ######################################################################
2933 * ######## Memory allocation from the OS
2934 * ######################################################################
2935 * This section of code deals with getting memory from the OS and
2936 * allocating memory for GC-internal data structures.
2937 * Internal memory can be handled with a freelist for small objects.
2941 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2942 * This must not require any lock.
2945 get_os_memory (size_t size, int activate)
2948 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2950 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2951 size += pagesize - 1;
2952 size &= ~(pagesize - 1);
2953 ptr = mono_valloc (0, size, prot_flags);
2958 * Free the memory returned by get_os_memory (), returning it to the OS.
2961 free_os_memory (void *addr, size_t size)
2963 munmap (addr, size);
2970 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2972 int i, free_pages, num_free, free_mem;
2974 for (i = 0; i < chunk->num_pages; ++i) {
2975 if (!chunk->page_sizes [i])
2978 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2979 free_mem = FREELIST_PAGESIZE * free_pages;
2980 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2981 if (!chunk->free_list [i])
2984 p = chunk->free_list [i];
2989 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2990 free_mem += freelist_sizes [i] * num_free;
2992 printf ("\tfree memory in chunk: %d\n", free_mem);
2998 static G_GNUC_UNUSED void
2999 report_internal_mem_usage (void) {
3002 printf ("Internal memory usage:\n");
3004 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
3005 report_pinned_chunk (chunk, i++);
3007 printf ("Pinned memory usage:\n");
3009 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
3010 report_pinned_chunk (chunk, i++);
3015 * the array of pointers from @start to @end contains conservative
3016 * pointers to objects inside @chunk: mark each referenced object
3020 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
3022 for (; start < end; start++) {
3023 char *addr = *start;
3024 int offset = (char*)addr - (char*)chunk;
3025 int page = offset / FREELIST_PAGESIZE;
3026 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
3027 int slot_size = chunk->page_sizes [page];
3029 /* the page is not allocated */
3032 /* would be faster if we restrict the sizes to power of two,
3033 * but that's a waste of memory: need to measure. it could reduce
3034 * fragmentation since there are less pages needed, if for example
3035 * someone interns strings of each size we end up with one page per
3036 * interned string (still this is just ~40 KB): with more fine-grained sizes
3037 * this increases the number of used pages.
3040 obj_offset /= slot_size;
3041 obj_offset *= slot_size;
3042 addr = (char*)chunk->start_data + obj_offset;
3044 obj_offset /= slot_size;
3045 obj_offset *= slot_size;
3046 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
3049 /* if the vtable is inside the chunk it's on the freelist, so skip */
3050 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
3052 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
3058 scan_pinned_objects (void (*callback) (PinnedChunk*, char*, size_t, void*), void *callback_data)
3065 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
3066 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
3067 DEBUG (6, fprintf (gc_debug_file, "Scanning pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
3068 for (i = 0; i < chunk->num_pages; ++i) {
3069 obj_size = chunk->page_sizes [i];
3072 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
3073 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
3074 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
3075 while (p + obj_size <= endp) {
3077 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
3078 /* if the first word (the vtable) is outside the chunk we have an object */
3079 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk))
3080 callback (chunk, (char*)ptr, obj_size, callback_data);
3088 sweep_pinned_objects_callback (PinnedChunk *chunk, char *ptr, size_t size, void *data)
3090 if (object_is_pinned (ptr)) {
3092 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
3094 DEBUG (6, fprintf (gc_debug_file, "Freeing unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
3095 free_pinned_object (chunk, ptr, size);
3100 sweep_pinned_objects (void)
3102 scan_pinned_objects (sweep_pinned_objects_callback, NULL);
3106 scan_object_callback (PinnedChunk *chunk, char *ptr, size_t size, char **data)
3108 DEBUG (6, fprintf (gc_debug_file, "Precise object scan of alloc_pinned %p (%s)\n", ptr, safe_name (ptr)));
3109 /* FIXME: Put objects without references into separate chunks
3110 which do not need to be scanned */
3111 scan_object (ptr, data [0], data [1]);
3115 scan_from_pinned_objects (char *addr_start, char *addr_end)
3117 char *data [2] = { addr_start, addr_end };
3118 scan_pinned_objects (scan_object_callback, data);
3122 * Find the slot number in the freelist for memory chunks that
3123 * can contain @size objects.
3126 slot_for_size (size_t size)
3129 /* do a binary search or lookup table later. */
3130 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
3131 if (freelist_sizes [slot] >= size)
3134 g_assert_not_reached ();
3139 * Build a free list for @size memory chunks from the memory area between
3140 * start_page and end_page.
3143 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
3147 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
3148 p = (void**)start_page;
3149 end = (void**)(end_page - size);
3150 g_assert (!chunk->free_list [slot]);
3151 chunk->free_list [slot] = p;
3152 while ((char*)p + size <= (char*)end) {
3154 *p = (void*)((char*)p + size);
3158 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
3162 alloc_pinned_chunk (size_t size)
3167 size += pagesize; /* at least one page */
3168 size += pagesize - 1;
3169 size &= ~(pagesize - 1);
3170 if (size < PINNED_CHUNK_MIN_SIZE * 2)
3171 size = PINNED_CHUNK_MIN_SIZE * 2;
3172 chunk = get_os_memory (size, TRUE);
3173 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
3174 total_alloc += size;
3176 /* setup the bookeeping fields */
3177 chunk->num_pages = size / FREELIST_PAGESIZE;
3178 offset = G_STRUCT_OFFSET (PinnedChunk, data);
3179 chunk->page_sizes = (void*)((char*)chunk + offset);
3180 offset += sizeof (int) * chunk->num_pages;
3181 offset += ALLOC_ALIGN - 1;
3182 offset &= ~(ALLOC_ALIGN - 1);
3183 chunk->free_list = (void*)((char*)chunk + offset);
3184 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
3185 offset += ALLOC_ALIGN - 1;
3186 offset &= ~(ALLOC_ALIGN - 1);
3187 chunk->start_data = (void*)((char*)chunk + offset);
3189 /* allocate the first page to the freelist */
3190 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
3191 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
3192 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
3193 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
3194 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
3198 /* assumes freelist for slot is empty, so try to alloc a new page */
3200 get_chunk_freelist (PinnedChunk *chunk, int slot)
3204 p = chunk->free_list [slot];
3206 chunk->free_list [slot] = *p;
3209 for (i = 0; i < chunk->num_pages; ++i) {
3211 if (chunk->page_sizes [i])
3213 size = freelist_sizes [slot];
3214 chunk->page_sizes [i] = size;
3215 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
3219 p = chunk->free_list [slot];
3221 chunk->free_list [slot] = *p;
3228 alloc_from_freelist (size_t size)
3232 PinnedChunk *pchunk;
3233 slot = slot_for_size (size);
3234 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
3235 g_assert (size <= freelist_sizes [slot]);
3236 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3237 void **p = pchunk->free_list [slot];
3239 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
3240 pchunk->free_list [slot] = *p;
3244 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3245 res = get_chunk_freelist (pchunk, slot);
3249 pchunk = alloc_pinned_chunk (size);
3250 /* FIXME: handle OOM */
3251 pchunk->next = pinned_chunk_list;
3252 pinned_chunk_list = pchunk;
3253 res = get_chunk_freelist (pchunk, slot);
3257 /* used for the GC-internal data structures */
3258 /* FIXME: add support for bigger sizes by allocating more than one page
3262 get_internal_mem (size_t size)
3264 return calloc (1, size);
3268 PinnedChunk *pchunk;
3269 slot = slot_for_size (size);
3270 g_assert (size <= freelist_sizes [slot]);
3271 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3272 void **p = pchunk->free_list [slot];
3274 pchunk->free_list [slot] = *p;
3278 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3279 res = get_chunk_freelist (pchunk, slot);
3283 pchunk = alloc_pinned_chunk (size);
3284 /* FIXME: handle OOM */
3285 pchunk->next = internal_chunk_list;
3286 internal_chunk_list = pchunk;
3287 res = get_chunk_freelist (pchunk, slot);
3293 free_internal_mem (void *addr)
3297 PinnedChunk *pchunk;
3298 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3299 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
3300 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
3301 int offset = (char*)addr - (char*)pchunk;
3302 int page = offset / FREELIST_PAGESIZE;
3303 int slot = slot_for_size (pchunk->page_sizes [page]);
3305 *p = pchunk->free_list [slot];
3306 pchunk->free_list [slot] = p;
3310 printf ("free of %p failed\n", addr);
3311 g_assert_not_reached ();
3316 * ######################################################################
3317 * ######## Object allocation
3318 * ######################################################################
3319 * This section of code deals with allocating memory for objects.
3320 * There are several ways:
3321 * *) allocate large objects
3322 * *) allocate normal objects
3323 * *) fast lock-free allocation
3324 * *) allocation of pinned objects
3328 free_large_object (LOSObject *obj)
3330 size_t size = obj->size;
3331 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
3333 los_memory_usage -= size;
3334 size += sizeof (LOSObject);
3335 size += pagesize - 1;
3336 size &= ~(pagesize - 1);
3337 total_alloc -= size;
3339 free_os_memory (obj, size);
3343 * Objects with size >= 64KB are allocated in the large object space.
3344 * They are currently kept track of with a linked list.
3345 * They don't move, so there is no need to pin them during collection
3346 * and we avoid the memcpy overhead.
3348 static void* __attribute__((noinline))
3349 alloc_large_inner (MonoVTable *vtable, size_t size)
3354 int just_did_major_gc = FALSE;
3356 if (los_memory_usage > next_los_collection) {
3357 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
3358 just_did_major_gc = TRUE;
3360 major_collection ();
3362 /* later increase based on a percent of the heap size */
3363 next_los_collection = los_memory_usage + 5*1024*1024;
3366 alloc_size += sizeof (LOSObject);
3367 alloc_size += pagesize - 1;
3368 alloc_size &= ~(pagesize - 1);
3369 /* FIXME: handle OOM */
3370 obj = get_os_memory (alloc_size, TRUE);
3372 vtslot = (void**)obj->data;
3374 total_alloc += alloc_size;
3375 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3376 obj->next = los_object_list;
3377 los_object_list = obj;
3378 los_memory_usage += size;
3380 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3384 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3385 * an object of size @size
3386 * Return FALSE if not found (which means we need a collection)
3389 search_fragment_for_size (size_t size)
3391 Fragment *frag, *prev;
3392 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3394 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3395 /* Clear the remaining space, pinning depends on this */
3396 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3399 for (frag = nursery_fragments; frag; frag = frag->next) {
3400 if (size <= (frag->fragment_end - frag->fragment_start)) {
3401 /* remove from the list */
3403 prev->next = frag->next;
3405 nursery_fragments = frag->next;
3406 nursery_next = frag->fragment_start;
3407 nursery_frag_real_end = frag->fragment_end;
3409 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3410 frag->next = fragment_freelist;
3411 fragment_freelist = frag;
3420 * size is already rounded up and we hold the GC lock.
3423 alloc_degraded (MonoVTable *vtable, size_t size)
3425 GCMemSection *section;
3427 for (section = section_list; section; section = section->next) {
3428 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3429 p = (void**)section->next_data;
3434 section = alloc_section (nursery_section->size * 4);
3435 /* FIXME: handle OOM */
3436 p = (void**)section->next_data;
3438 section->next_data += size;
3439 degraded_mode += size;
3440 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3446 * Provide a variant that takes just the vtable for small fixed-size objects.
3447 * The aligned size is already computed and stored in vt->gc_descr.
3448 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3449 * processing. We can keep track of where objects start, for example,
3450 * so when we scan the thread stacks for pinned objects, we can start
3451 * a search for the pinned object in SCAN_START_SIZE chunks.
3454 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3456 /* FIXME: handle OOM */
3461 size += ALLOC_ALIGN - 1;
3462 size &= ~(ALLOC_ALIGN - 1);
3464 g_assert (vtable->gc_descr);
3466 if (G_UNLIKELY (collect_before_allocs)) {
3469 if (nursery_section) {
3472 update_current_thread_stack (&dummy);
3474 collect_nursery (0);
3476 if (!degraded_mode && !search_fragment_for_size (size)) {
3478 g_assert_not_reached ();
3484 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3486 p = (void**)tlab_next;
3487 /* FIXME: handle overflow */
3488 new_next = (char*)p + size;
3489 tlab_next = new_next;
3491 if (G_LIKELY (new_next < tlab_temp_end)) {
3495 * FIXME: We might need a memory barrier here so the change to tlab_next is
3496 * visible before the vtable store.
3499 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3507 /* there are two cases: the object is too big or we run out of space in the TLAB */
3508 /* we also reach here when the thread does its first allocation after a minor
3509 * collection, since the tlab_ variables are initialized to NULL.
3510 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3511 * objects that need finalizers can have the high bit set in their size
3512 * so the above check fails and we can readily add the object to the queue.
3513 * This avoids taking again the GC lock when registering, but this is moot when
3514 * doing thread-local allocation, so it may not be a good idea.
3517 if (size > MAX_SMALL_OBJ_SIZE) {
3518 /* get ready for possible collection */
3519 update_current_thread_stack (&dummy);
3521 p = alloc_large_inner (vtable, size);
3523 if (tlab_next >= tlab_real_end) {
3525 * Run out of space in the TLAB. When this happens, some amount of space
3526 * remains in the TLAB, but not enough to satisfy the current allocation
3527 * request. Currently, we retire the TLAB in all cases, later we could
3528 * keep it if the remaining space is above a treshold, and satisfy the
3529 * allocation directly from the nursery.
3532 /* when running in degraded mode, we continue allocing that way
3533 * for a while, to decrease the number of useless nursery collections.
3535 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3536 p = alloc_degraded (vtable, size);
3541 if (size > tlab_size) {
3542 /* Allocate directly from the nursery */
3543 if (nursery_next + size >= nursery_frag_real_end) {
3544 if (!search_fragment_for_size (size)) {
3545 /* get ready for possible collection */
3546 update_current_thread_stack (&dummy);
3547 minor_collect_or_expand_inner (size);
3548 if (degraded_mode) {
3549 p = alloc_degraded (vtable, size);
3556 p = (void*)nursery_next;
3557 nursery_next += size;
3558 if (nursery_next > nursery_frag_real_end) {
3563 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3564 memset (p, 0, size);
3567 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3569 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3570 res = search_fragment_for_size (tlab_size);
3572 /* get ready for possible collection */
3573 update_current_thread_stack (&dummy);
3574 minor_collect_or_expand_inner (tlab_size);
3575 if (degraded_mode) {
3576 p = alloc_degraded (vtable, size);
3583 /* Allocate a new TLAB from the current nursery fragment */
3584 tlab_start = nursery_next;
3585 nursery_next += tlab_size;
3586 tlab_next = tlab_start;
3587 tlab_real_end = tlab_start + tlab_size;
3588 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3590 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3591 memset (tlab_start, 0, tlab_size);
3593 /* Allocate from the TLAB */
3594 p = (void*)tlab_next;
3596 g_assert (tlab_next <= tlab_real_end);
3598 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3601 /* Reached tlab_temp_end */
3603 /* record the scan start so we can find pinned objects more easily */
3604 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3605 /* we just bump tlab_temp_end as well */
3606 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3607 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3611 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3620 * To be used for interned strings and possibly MonoThread, reflection handles.
3621 * We may want to explicitly free these objects.
3624 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3626 /* FIXME: handle OOM */
3628 size += ALLOC_ALIGN - 1;
3629 size &= ~(ALLOC_ALIGN - 1);
3631 if (size > MAX_FREELIST_SIZE) {
3632 update_current_thread_stack (&p);
3633 /* large objects are always pinned anyway */
3634 p = alloc_large_inner (vtable, size);
3636 p = alloc_from_freelist (size);
3637 memset (p, 0, size);
3639 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3646 * ######################################################################
3647 * ######## Finalization support
3648 * ######################################################################
3652 * this is valid for the nursery: if the object has been forwarded it means it's
3653 * still refrenced from a root. If it is pinned it's still alive as well.
3654 * Return TRUE if @obj is ready to be finalized.
3656 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3659 is_critical_finalizer (FinalizeEntry *entry)
3664 if (!mono_defaults.critical_finalizer_object)
3667 obj = entry->object;
3668 class = ((MonoVTable*)LOAD_VTABLE (obj))->klass;
3670 return mono_class_has_parent (class, mono_defaults.critical_finalizer_object);
3674 queue_finalization_entry (FinalizeEntry *entry) {
3675 if (is_critical_finalizer (entry)) {
3676 entry->next = critical_fin_list;
3677 critical_fin_list = entry;
3679 entry->next = fin_ready_list;
3680 fin_ready_list = entry;
3685 finalize_in_range (char *start, char *end)
3687 FinalizeEntry *entry, *prev;
3691 for (i = 0; i < finalizable_hash_size; ++i) {
3693 for (entry = finalizable_hash [i]; entry;) {
3694 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3695 if (object_is_fin_ready (entry->object)) {
3697 FinalizeEntry *next;
3698 /* remove and put in fin_ready_list */
3700 prev->next = entry->next;
3702 finalizable_hash [i] = entry->next;
3704 num_ready_finalizers++;
3705 num_registered_finalizers--;
3706 queue_finalization_entry (entry);
3707 /* Make it survive */
3708 from = entry->object;
3709 entry->object = copy_object (entry->object, start, end);
3710 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3714 /* update pointer */
3715 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3716 entry->object = copy_object (entry->object, start, end);
3720 entry = entry->next;
3726 null_link_in_range (char *start, char *end)
3728 DisappearingLink *entry, *prev;
3730 for (i = 0; i < disappearing_link_hash_size; ++i) {
3732 for (entry = disappearing_link_hash [i]; entry;) {
3733 char *object = DISLINK_OBJECT (entry);
3734 if (object >= start && object < end && (object < to_space || object >= to_space_end)) {
3735 if (!DISLINK_TRACK (entry) && object_is_fin_ready (object)) {
3736 void **p = entry->link;
3737 DisappearingLink *old;
3739 /* remove from list */
3741 prev->next = entry->next;
3743 disappearing_link_hash [i] = entry->next;
3744 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, object));
3746 free_internal_mem (entry);
3748 num_disappearing_links--;
3751 /* update pointer if it's moved
3752 * FIXME: what if an object is moved earlier?
3755 * resurrection bit to FALSE
3756 * here so that the object can
3757 * be collected in the next
3758 * cycle (i.e. after it was
3761 *entry->link = HIDE_POINTER (copy_object (object, start, end), FALSE);
3762 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->link, DISLINK_OBJECT (entry)));
3766 entry = entry->next;
3772 null_links_for_domain (MonoDomain *domain)
3774 DisappearingLink *entry, *prev;
3776 for (i = 0; i < disappearing_link_hash_size; ++i) {
3778 for (entry = disappearing_link_hash [i]; entry; ) {
3779 char *object = DISLINK_OBJECT (entry);
3780 if (object && mono_object_domain (object) == domain) {
3781 DisappearingLink *next = entry->next;
3786 disappearing_link_hash [i] = next;
3788 if (*(entry->link)) {
3789 *(entry->link) = NULL;
3790 g_warning ("Disappearing link not freed");
3792 free_internal_mem (entry);
3799 entry = entry->next;
3805 * mono_gc_finalizers_for_domain:
3806 * @domain: the unloading appdomain
3807 * @out_array: output array
3808 * @out_size: size of output array
3810 * Store inside @out_array up to @out_size objects that belong to the unloading
3811 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3812 * until it returns 0.
3813 * The items are removed from the finalizer data structure, so the caller is supposed
3815 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3818 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3820 FinalizeEntry *entry, *prev;
3822 if (no_finalize || !out_size || !out_array)
3826 for (i = 0; i < finalizable_hash_size; ++i) {
3828 for (entry = finalizable_hash [i]; entry;) {
3829 if (mono_object_domain (entry->object) == domain) {
3830 FinalizeEntry *next;
3831 /* remove and put in out_array */
3833 prev->next = entry->next;
3835 finalizable_hash [i] = entry->next;
3837 num_registered_finalizers--;
3838 out_array [count ++] = entry->object;
3839 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3841 if (count == out_size) {
3848 entry = entry->next;
3856 rehash_fin_table (void)
3860 FinalizeEntry **new_hash;
3861 FinalizeEntry *entry, *next;
3862 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3864 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3865 for (i = 0; i < finalizable_hash_size; ++i) {
3866 for (entry = finalizable_hash [i]; entry; entry = next) {
3867 hash = mono_object_hash (entry->object) % new_size;
3869 entry->next = new_hash [hash];
3870 new_hash [hash] = entry;
3873 free_internal_mem (finalizable_hash);
3874 finalizable_hash = new_hash;
3875 finalizable_hash_size = new_size;
3879 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3881 FinalizeEntry *entry, *prev;
3885 g_assert (user_data == NULL || user_data == mono_gc_run_finalize);
3886 hash = mono_object_hash (obj);
3888 if (num_registered_finalizers >= finalizable_hash_size * 2)
3889 rehash_fin_table ();
3890 hash %= finalizable_hash_size;
3892 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3893 if (entry->object == obj) {
3895 /* remove from the list */
3897 prev->next = entry->next;
3899 finalizable_hash [hash] = entry->next;
3900 num_registered_finalizers--;
3901 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3902 free_internal_mem (entry);
3910 /* request to deregister, but already out of the list */
3914 entry = get_internal_mem (sizeof (FinalizeEntry));
3915 entry->object = obj;
3916 entry->next = finalizable_hash [hash];
3917 finalizable_hash [hash] = entry;
3918 num_registered_finalizers++;
3919 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3924 rehash_dislink (void)
3928 DisappearingLink **new_hash;
3929 DisappearingLink *entry, *next;
3930 int new_size = g_spaced_primes_closest (num_disappearing_links);
3932 new_hash = get_internal_mem (new_size * sizeof (DisappearingLink*));
3933 for (i = 0; i < disappearing_link_hash_size; ++i) {
3934 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3935 hash = mono_aligned_addr_hash (entry->link) % new_size;
3937 entry->next = new_hash [hash];
3938 new_hash [hash] = entry;
3941 free_internal_mem (disappearing_link_hash);
3942 disappearing_link_hash = new_hash;
3943 disappearing_link_hash_size = new_size;
3947 mono_gc_register_disappearing_link (MonoObject *obj, void **link, gboolean track)
3949 DisappearingLink *entry, *prev;
3953 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3955 /* FIXME: add check that link is not in the heap */
3956 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3957 entry = disappearing_link_hash [hash];
3959 for (; entry; entry = entry->next) {
3960 /* link already added */
3961 if (link == entry->link) {
3962 /* NULL obj means remove */
3965 prev->next = entry->next;
3967 disappearing_link_hash [hash] = entry->next;
3968 num_disappearing_links--;
3969 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3970 free_internal_mem (entry);
3973 *link = HIDE_POINTER (obj, track); /* we allow the change of object */
3980 entry = get_internal_mem (sizeof (DisappearingLink));
3981 *link = HIDE_POINTER (obj, track);
3983 entry->next = disappearing_link_hash [hash];
3984 disappearing_link_hash [hash] = entry;
3985 num_disappearing_links++;
3986 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3991 mono_gc_invoke_finalizers (void)
3993 FinalizeEntry *entry = NULL;
3994 gboolean entry_is_critical;
3997 /* FIXME: batch to reduce lock contention */
3998 while (fin_ready_list || critical_fin_list) {
4002 FinalizeEntry **list = entry_is_critical ? &critical_fin_list : &fin_ready_list;
4004 /* We have finalized entry in the last
4005 interation, now we need to remove it from
4008 *list = entry->next;
4010 FinalizeEntry *e = *list;
4011 while (e->next != entry)
4013 e->next = entry->next;
4015 free_internal_mem (entry);
4019 /* Now look for the first non-null entry. */
4020 for (entry = fin_ready_list; entry && !entry->object; entry = entry->next)
4023 entry_is_critical = FALSE;
4025 entry_is_critical = TRUE;
4026 for (entry = critical_fin_list; entry && !entry->object; entry = entry->next)
4031 g_assert (entry->object);
4032 num_ready_finalizers--;
4033 obj = entry->object;
4034 entry->object = NULL;
4035 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
4043 g_assert (entry->object == NULL);
4045 /* the object is on the stack so it is pinned */
4046 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
4047 mono_gc_run_finalize (obj, NULL);
4054 mono_gc_pending_finalizers (void)
4056 return fin_ready_list || critical_fin_list;
4059 /* Negative value to remove */
4061 mono_gc_add_memory_pressure (gint64 value)
4063 /* FIXME: Use interlocked functions */
4065 memory_pressure += value;
4070 * ######################################################################
4071 * ######## registered roots support
4072 * ######################################################################
4076 rehash_roots (gboolean pinned)
4080 RootRecord **new_hash;
4081 RootRecord *entry, *next;
4084 new_size = g_spaced_primes_closest (num_roots_entries [pinned]);
4085 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
4086 for (i = 0; i < roots_hash_size [pinned]; ++i) {
4087 for (entry = roots_hash [pinned][i]; entry; entry = next) {
4088 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
4090 entry->next = new_hash [hash];
4091 new_hash [hash] = entry;
4094 free_internal_mem (roots_hash [pinned]);
4095 roots_hash [pinned] = new_hash;
4096 roots_hash_size [pinned] = new_size;
4100 find_root (int root_type, char *start, guint32 addr_hash)
4102 RootRecord *new_root;
4104 guint32 hash = addr_hash % roots_hash_size [root_type];
4105 for (new_root = roots_hash [root_type][hash]; new_root; new_root = new_root->next) {
4106 /* we allow changing the size and the descriptor (for thread statics etc) */
4107 if (new_root->start_root == start) {
4116 * We do not coalesce roots.
4119 mono_gc_register_root_inner (char *start, size_t size, void *descr, int root_type)
4121 RootRecord *new_root;
4122 unsigned int hash, addr_hash = mono_aligned_addr_hash (start);
4125 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
4126 if (num_roots_entries [i] >= roots_hash_size [i] * 2)
4129 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
4130 new_root = find_root (i, start, addr_hash);
4131 /* we allow changing the size and the descriptor (for thread statics etc) */
4133 size_t old_size = new_root->end_root - new_root->start_root;
4134 new_root->end_root = new_root->start_root + size;
4135 g_assert (((new_root->root_desc != 0) && (descr != NULL)) ||
4136 ((new_root->root_desc == 0) && (descr == NULL)));
4137 new_root->root_desc = (mword)descr;
4139 roots_size -= old_size;
4144 new_root = get_internal_mem (sizeof (RootRecord));
4146 new_root->start_root = start;
4147 new_root->end_root = new_root->start_root + size;
4148 new_root->root_desc = (mword)descr;
4150 hash = addr_hash % roots_hash_size [root_type];
4151 num_roots_entries [root_type]++;
4152 new_root->next = roots_hash [root_type] [hash];
4153 roots_hash [root_type][hash] = new_root;
4154 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
4164 mono_gc_register_root (char *start, size_t size, void *descr)
4166 return mono_gc_register_root_inner (start, size, descr, descr ? ROOT_TYPE_NORMAL : ROOT_TYPE_PINNED);
4170 mono_gc_register_root_wbarrier (char *start, size_t size, void *descr)
4172 return mono_gc_register_root_inner (start, size, descr, ROOT_TYPE_WBARRIER);
4176 mono_gc_deregister_root (char* addr)
4178 RootRecord *tmp, *prev;
4179 unsigned int hash, addr_hash = mono_aligned_addr_hash (addr);
4183 for (root_type = 0; root_type < ROOT_TYPE_NUM; ++root_type) {
4184 hash = addr_hash % roots_hash_size [root_type];
4185 tmp = roots_hash [root_type][hash];
4188 if (tmp->start_root == (char*)addr) {
4190 prev->next = tmp->next;
4192 roots_hash [root_type][hash] = tmp->next;
4193 roots_size -= (tmp->end_root - tmp->start_root);
4194 num_roots_entries [root_type]--;
4195 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
4196 free_internal_mem (tmp);
4207 * ######################################################################
4208 * ######## Thread handling (stop/start code)
4209 * ######################################################################
4212 /* eventually share with MonoThread? */
4213 typedef struct _SgenThreadInfo SgenThreadInfo;
4215 struct _SgenThreadInfo {
4216 SgenThreadInfo *next;
4217 ARCH_THREAD_TYPE id;
4218 unsigned int stop_count; /* to catch duplicate signals */
4223 char **tlab_next_addr;
4224 char **tlab_start_addr;
4225 char **tlab_temp_end_addr;
4226 char **tlab_real_end_addr;
4227 RememberedSet *remset;
4228 gpointer runtime_data;
4231 /* FIXME: handle large/small config */
4232 #define THREAD_HASH_SIZE 11
4233 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
4235 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
4237 #if USE_SIGNAL_BASED_START_STOP_WORLD
4239 static sem_t suspend_ack_semaphore;
4240 static unsigned int global_stop_count = 0;
4241 static int suspend_signal_num = SIGPWR;
4242 static int restart_signal_num = SIGXCPU;
4243 static sigset_t suspend_signal_mask;
4244 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
4246 /* LOCKING: assumes the GC lock is held */
4247 static SgenThreadInfo*
4248 thread_info_lookup (ARCH_THREAD_TYPE id)
4250 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4251 SgenThreadInfo *info;
4253 info = thread_table [hash];
4254 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
4261 update_current_thread_stack (void *start)
4263 void *ptr = cur_thread_regs;
4264 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
4265 info->stack_start = align_pointer (&ptr);
4266 ARCH_STORE_REGS (ptr);
4267 if (gc_callbacks.thread_suspend_func)
4268 gc_callbacks.thread_suspend_func (info->runtime_data, NULL);
4272 signal_desc (int signum)
4274 if (signum == suspend_signal_num)
4276 if (signum == restart_signal_num)
4281 /* LOCKING: assumes the GC lock is held */
4283 thread_handshake (int signum)
4285 int count, i, result;
4286 SgenThreadInfo *info;
4287 pthread_t me = pthread_self ();
4290 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4291 for (info = thread_table [i]; info; info = info->next) {
4292 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
4293 if (ARCH_THREAD_EQUALS (info->id, me)) {
4294 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
4297 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
4299 result = pthread_kill (info->id, signum);
4301 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
4304 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
4310 for (i = 0; i < count; ++i) {
4311 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
4312 if (errno != EINTR) {
4313 g_error ("sem_wait ()");
4320 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
4322 suspend_handler (int sig, siginfo_t *siginfo, void *context)
4324 SgenThreadInfo *info;
4327 int old_errno = errno;
4329 id = pthread_self ();
4330 info = thread_info_lookup (id);
4331 stop_count = global_stop_count;
4332 /* duplicate signal */
4333 if (0 && info->stop_count == stop_count) {
4337 /* update the remset info in the thread data structure */
4338 info->remset = remembered_set;
4340 * this includes the register values that the kernel put on the stack.
4341 * Write arch-specific code to only push integer regs and a more accurate
4344 info->stack_start = align_pointer (&id);
4346 /* Notify the JIT */
4347 if (gc_callbacks.thread_suspend_func)
4348 gc_callbacks.thread_suspend_func (info->runtime_data, context);
4350 /* notify the waiting thread */
4351 sem_post (&suspend_ack_semaphore);
4352 info->stop_count = stop_count;
4354 /* wait until we receive the restart signal */
4357 sigsuspend (&suspend_signal_mask);
4358 } while (info->signal != restart_signal_num);
4360 /* notify the waiting thread */
4361 sem_post (&suspend_ack_semaphore);
4367 restart_handler (int sig)
4369 SgenThreadInfo *info;
4370 int old_errno = errno;
4372 info = thread_info_lookup (pthread_self ());
4373 info->signal = restart_signal_num;
4378 static TV_DECLARE (stop_world_time);
4379 static unsigned long max_pause_usec = 0;
4381 /* LOCKING: assumes the GC lock is held */
4387 global_stop_count++;
4388 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
4389 TV_GETTIME (stop_world_time);
4390 count = thread_handshake (suspend_signal_num);
4391 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
4395 /* LOCKING: assumes the GC lock is held */
4397 restart_world (void)
4400 TV_DECLARE (end_sw);
4403 count = thread_handshake (restart_signal_num);
4404 TV_GETTIME (end_sw);
4405 usec = TV_ELAPSED (stop_world_time, end_sw);
4406 max_pause_usec = MAX (usec, max_pause_usec);
4407 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
4411 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
4414 mono_gc_set_gc_callbacks (MonoGCCallbacks *callbacks)
4416 gc_callbacks = *callbacks;
4419 /* Variables holding start/end nursery so it won't have to be passed at every call */
4420 static void *scan_area_arg_start, *scan_area_arg_end;
4423 mono_gc_conservatively_scan_area (void *start, void *end)
4425 conservatively_pin_objects_from (start, end, scan_area_arg_start, scan_area_arg_end);
4429 mono_gc_scan_object (void *obj)
4431 return copy_object (obj, scan_area_arg_start, scan_area_arg_end);
4435 * Mark from thread stacks and registers.
4438 scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise)
4441 SgenThreadInfo *info;
4443 scan_area_arg_start = start_nursery;
4444 scan_area_arg_end = end_nursery;
4446 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4447 for (info = thread_table [i]; info; info = info->next) {
4449 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
4452 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd, pinned=%d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, next_pin_slot));
4453 if (gc_callbacks.thread_mark_func)
4454 gc_callbacks.thread_mark_func (info->runtime_data, info->stack_start, info->stack_end, precise);
4456 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
4459 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers, pinned=%d\n", next_pin_slot));
4461 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
4465 find_pinning_ref_from_thread (char *obj, size_t size)
4468 SgenThreadInfo *info;
4469 char *endobj = obj + size;
4471 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4472 for (info = thread_table [i]; info; info = info->next) {
4473 char **start = (char**)info->stack_start;
4476 while (start < (char**)info->stack_end) {
4477 if (*start >= obj && *start < endobj) {
4478 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
4484 /* FIXME: check register */
4487 /* return TRUE if ptr points inside the managed heap */
4489 ptr_in_heap (void* ptr)
4491 mword p = (mword)ptr;
4492 if (p < lowest_heap_address || p >= highest_heap_address)
4494 /* FIXME: more checks */
4499 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
4505 /* FIXME: exclude stack locations */
4506 switch ((*p) & REMSET_TYPE_MASK) {
4507 case REMSET_LOCATION:
4509 //__builtin_prefetch (ptr);
4510 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
4511 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4512 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
4513 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4515 * If the object is pinned, each reference to it from nonpinned objects
4516 * becomes part of the global remset, which can grow very large.
4518 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4519 add_to_global_remset (ptr, FALSE);
4522 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
4526 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4527 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4530 while (count-- > 0) {
4531 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4532 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
4533 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
4534 add_to_global_remset (ptr, FALSE);
4539 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4540 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4542 scan_object (*ptr, start_nursery, end_nursery);
4544 case REMSET_OTHER: {
4545 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4549 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4552 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4554 case REMSET_ROOT_LOCATION:
4555 /* Same as REMSET_LOCATION, but the address is not required to be in the heap */
4556 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4557 DEBUG (9, fprintf (gc_debug_file, "Overwrote root location remset at %p with %p\n", ptr, *ptr));
4558 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4560 * If the object is pinned, each reference to it from nonpinned objects
4561 * becomes part of the global remset, which can grow very large.
4563 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4564 add_to_global_remset (ptr, TRUE);
4568 g_assert_not_reached ();
4573 g_assert_not_reached ();
4579 scan_from_remsets (void *start_nursery, void *end_nursery)
4582 SgenThreadInfo *info;
4583 RememberedSet *remset, *next;
4584 mword *p, *next_p, *store_pos;
4586 /* the global one */
4587 for (remset = global_remset; remset; remset = remset->next) {
4588 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4589 store_pos = remset->data;
4590 for (p = remset->data; p < remset->store_next; p = next_p) {
4593 next_p = handle_remset (p, start_nursery, end_nursery, TRUE);
4596 * Clear global remsets of locations which no longer point to the
4597 * nursery. Otherwise, they could grow indefinitely between major
4600 ptr = (p [0] & ~REMSET_TYPE_MASK);
4601 if ((p [0] & REMSET_TYPE_MASK) == REMSET_LOCATION) {
4602 if (ptr_in_nursery (*(void**)ptr))
4603 *store_pos ++ = p [0];
4605 g_assert ((p [0] & REMSET_TYPE_MASK) == REMSET_OTHER);
4606 g_assert (p [1] == REMSET_ROOT_LOCATION);
4607 if (ptr_in_nursery (*(void**)ptr)) {
4608 *store_pos ++ = p [0];
4609 *store_pos ++ = p [1];
4614 /* Truncate the remset */
4615 remset->store_next = store_pos;
4618 /* the per-thread ones */
4619 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4620 for (info = thread_table [i]; info; info = info->next) {
4621 for (remset = info->remset; remset; remset = next) {
4622 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4623 for (p = remset->data; p < remset->store_next;) {
4624 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4626 remset->store_next = remset->data;
4627 next = remset->next;
4628 remset->next = NULL;
4629 if (remset != info->remset) {
4630 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4631 free_internal_mem (remset);
4639 * Clear the info in the remembered sets: we're doing a major collection, so
4640 * the per-thread ones are not needed and the global ones will be reconstructed
4644 clear_remsets (void)
4647 SgenThreadInfo *info;
4648 RememberedSet *remset, *next;
4650 /* the global list */
4651 for (remset = global_remset; remset; remset = next) {
4652 remset->store_next = remset->data;
4653 next = remset->next;
4654 remset->next = NULL;
4655 if (remset != global_remset) {
4656 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4657 free_internal_mem (remset);
4660 /* the per-thread ones */
4661 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4662 for (info = thread_table [i]; info; info = info->next) {
4663 for (remset = info->remset; remset; remset = next) {
4664 remset->store_next = remset->data;
4665 next = remset->next;
4666 remset->next = NULL;
4667 if (remset != info->remset) {
4668 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4669 free_internal_mem (remset);
4677 * Clear the thread local TLAB variables for all threads.
4682 SgenThreadInfo *info;
4685 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4686 for (info = thread_table [i]; info; info = info->next) {
4687 /* A new TLAB will be allocated when the thread does its first allocation */
4688 *info->tlab_start_addr = NULL;
4689 *info->tlab_next_addr = NULL;
4690 *info->tlab_temp_end_addr = NULL;
4691 *info->tlab_real_end_addr = NULL;
4697 * Find the tlab_next value of the TLAB which contains ADDR.
4700 find_tlab_next_from_address (char *addr)
4702 SgenThreadInfo *info;
4705 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4706 for (info = thread_table [i]; info; info = info->next) {
4707 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4708 return *info->tlab_next_addr;
4715 /* LOCKING: assumes the GC lock is held */
4716 static SgenThreadInfo*
4717 gc_register_current_thread (void *addr)
4720 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4723 info->id = ARCH_GET_THREAD ();
4724 info->stop_count = -1;
4727 info->stack_start = NULL;
4728 info->tlab_start_addr = &tlab_start;
4729 info->tlab_next_addr = &tlab_next;
4730 info->tlab_temp_end_addr = &tlab_temp_end;
4731 info->tlab_real_end_addr = &tlab_real_end;
4733 tlab_next_addr = &tlab_next;
4735 /* try to get it with attributes first */
4736 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4740 pthread_attr_t attr;
4741 pthread_getattr_np (pthread_self (), &attr);
4742 pthread_attr_getstack (&attr, &sstart, &size);
4743 info->stack_end = (char*)sstart + size;
4744 pthread_attr_destroy (&attr);
4746 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4747 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4750 /* FIXME: we assume the stack grows down */
4751 gsize stack_bottom = (gsize)addr;
4752 stack_bottom += 4095;
4753 stack_bottom &= ~4095;
4754 info->stack_end = (char*)stack_bottom;
4758 /* hash into the table */
4759 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4760 info->next = thread_table [hash];
4761 thread_table [hash] = info;
4763 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4764 pthread_setspecific (remembered_set_key, remembered_set);
4765 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4767 if (gc_callbacks.thread_attach_func)
4768 info->runtime_data = gc_callbacks.thread_attach_func ();
4774 unregister_current_thread (void)
4777 SgenThreadInfo *prev = NULL;
4779 RememberedSet *rset;
4780 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4782 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4783 p = thread_table [hash];
4785 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4786 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4791 thread_table [hash] = p->next;
4793 prev->next = p->next;
4796 /* FIXME: transfer remsets if any */
4798 RememberedSet *next = rset->next;
4799 free_internal_mem (rset);
4806 unregister_thread (void *k)
4809 unregister_current_thread ();
4814 mono_gc_register_thread (void *baseptr)
4816 SgenThreadInfo *info;
4818 info = thread_info_lookup (ARCH_GET_THREAD ());
4820 info = gc_register_current_thread (baseptr);
4822 return info != NULL;
4825 #if USE_PTHREAD_INTERCEPT
4827 #undef pthread_create
4829 #undef pthread_detach
4832 void *(*start_routine) (void *);
4836 } SgenThreadStartInfo;
4839 gc_start_thread (void *arg)
4841 SgenThreadStartInfo *start_info = arg;
4842 SgenThreadInfo* info;
4843 void *t_arg = start_info->arg;
4844 void *(*start_func) (void*) = start_info->start_routine;
4848 info = gc_register_current_thread (&result);
4850 sem_post (&(start_info->registered));
4851 result = start_func (t_arg);
4853 * this is done by the pthread key dtor
4855 unregister_current_thread ();
4863 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4865 SgenThreadStartInfo *start_info;
4868 start_info = malloc (sizeof (SgenThreadStartInfo));
4871 sem_init (&(start_info->registered), 0, 0);
4872 start_info->arg = arg;
4873 start_info->start_routine = start_routine;
4875 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4877 while (sem_wait (&(start_info->registered)) != 0) {
4878 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4881 sem_destroy (&(start_info->registered));
4887 mono_gc_pthread_join (pthread_t thread, void **retval)
4889 return pthread_join (thread, retval);
4893 mono_gc_pthread_detach (pthread_t thread)
4895 return pthread_detach (thread);
4898 #endif /* USE_PTHREAD_INTERCEPT */
4901 * ######################################################################
4902 * ######## Write barriers
4903 * ######################################################################
4906 static RememberedSet*
4907 alloc_remset (int size, gpointer id) {
4908 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4909 res->store_next = res->data;
4910 res->end_set = res->data + size;
4912 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4917 * Note: the write barriers first do the needed GC work and then do the actual store:
4918 * this way the value is visible to the conservative GC scan after the write barrier
4919 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4920 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4921 * happens when we need to record which pointers contain references to the new generation.
4922 * The write barrier will be executed, but the pointer is still not stored.
4925 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4928 if (ptr_in_nursery (field_ptr)) {
4929 *(void**)field_ptr = value;
4932 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4933 rs = remembered_set;
4934 if (rs->store_next < rs->end_set) {
4935 *(rs->store_next++) = (mword)field_ptr;
4936 *(void**)field_ptr = value;
4939 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4940 rs->next = remembered_set;
4941 remembered_set = rs;
4942 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4943 *(rs->store_next++) = (mword)field_ptr;
4944 *(void**)field_ptr = value;
4948 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4950 RememberedSet *rs = remembered_set;
4951 if (ptr_in_nursery (slot_ptr)) {
4952 *(void**)slot_ptr = value;
4955 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4956 if (rs->store_next < rs->end_set) {
4957 *(rs->store_next++) = (mword)slot_ptr;
4958 *(void**)slot_ptr = value;
4961 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4962 rs->next = remembered_set;
4963 remembered_set = rs;
4964 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4965 *(rs->store_next++) = (mword)slot_ptr;
4966 *(void**)slot_ptr = value;
4970 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4972 RememberedSet *rs = remembered_set;
4973 if (ptr_in_nursery (slot_ptr))
4975 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4976 if (rs->store_next + 1 < rs->end_set) {
4977 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4978 *(rs->store_next++) = count;
4981 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4982 rs->next = remembered_set;
4983 remembered_set = rs;
4984 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4985 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4986 *(rs->store_next++) = count;
4990 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4993 if (ptr_in_nursery (ptr)) {
4994 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4995 *(void**)ptr = value;
4998 rs = remembered_set;
4999 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
5000 /* FIXME: ensure it is on the heap */
5001 if (rs->store_next < rs->end_set) {
5002 *(rs->store_next++) = (mword)ptr;
5003 *(void**)ptr = value;
5006 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
5007 rs->next = remembered_set;
5008 remembered_set = rs;
5009 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
5010 *(rs->store_next++) = (mword)ptr;
5011 *(void**)ptr = value;
5015 mono_gc_wbarrier_set_root (gpointer ptr, MonoObject *value)
5017 RememberedSet *rs = remembered_set;
5018 if (ptr_in_nursery (ptr))
5020 DEBUG (8, fprintf (gc_debug_file, "Adding root remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
5022 if (rs->store_next + 2 < rs->end_set) {
5023 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
5024 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
5025 *(void**)ptr = value;
5028 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
5029 rs->next = remembered_set;
5030 remembered_set = rs;
5031 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
5032 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
5033 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
5035 *(void**)ptr = value;
5039 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
5041 RememberedSet *rs = remembered_set;
5042 if (ptr_in_nursery (dest))
5044 DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
5046 if (rs->store_next + 2 < rs->end_set) {
5047 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
5048 *(rs->store_next++) = (mword)REMSET_VTYPE;
5049 *(rs->store_next++) = (mword)klass->gc_descr;
5052 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
5053 rs->next = remembered_set;
5054 remembered_set = rs;
5055 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
5056 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
5057 *(rs->store_next++) = (mword)REMSET_VTYPE;
5058 *(rs->store_next++) = (mword)klass->gc_descr;
5062 * mono_gc_wbarrier_object:
5064 * Write barrier to call when obj is the result of a clone or copy of an object.
5067 mono_gc_wbarrier_object (MonoObject* obj)
5069 RememberedSet *rs = remembered_set;
5070 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
5071 if (rs->store_next < rs->end_set) {
5072 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
5075 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
5076 rs->next = remembered_set;
5077 remembered_set = rs;
5078 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
5079 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
5083 * ######################################################################
5084 * ######## Collector debugging
5085 * ######################################################################
5088 const char*descriptor_types [] = {
5100 describe_ptr (char *ptr)
5102 GCMemSection *section;
5107 if (ptr_in_nursery (ptr)) {
5108 printf ("Pointer inside nursery.\n");
5110 for (section = section_list; section;) {
5111 if (ptr >= section->data && ptr < section->data + section->size)
5113 section = section->next;
5117 printf ("Pointer inside oldspace.\n");
5118 } else if (obj_is_from_pinned_alloc (ptr)) {
5119 printf ("Pointer is inside a pinned chunk.\n");
5121 printf ("Pointer unknown.\n");
5126 if (object_is_pinned (ptr))
5127 printf ("Object is pinned.\n");
5129 if (object_is_forwarded (ptr))
5130 printf ("Object is forwared.\n");
5132 // FIXME: Handle pointers to the inside of objects
5133 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
5135 printf ("VTable: %p\n", vtable);
5136 if (vtable == NULL) {
5137 printf ("VTable is invalid (empty).\n");
5140 if (ptr_in_nursery (vtable)) {
5141 printf ("VTable is invalid (points inside nursery).\n");
5144 printf ("Class: %s\n", vtable->klass->name);
5146 desc = ((GCVTable*)vtable)->desc;
5147 printf ("Descriptor: %lx\n", desc);
5150 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
5154 find_in_remset_loc (mword *p, char *addr, gboolean *found)
5160 switch ((*p) & REMSET_TYPE_MASK) {
5161 case REMSET_LOCATION:
5162 if (*p == (mword)addr)
5166 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5168 if ((void**)addr >= ptr && (void**)addr < ptr + count)
5172 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5173 count = safe_object_get_size ((MonoObject*)ptr);
5174 count += (ALLOC_ALIGN - 1);
5175 count &= (ALLOC_ALIGN - 1);
5176 count /= sizeof (mword);
5177 if ((void**)addr >= ptr && (void**)addr < ptr + count)
5180 case REMSET_OTHER: {
5183 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
5186 switch (desc & 0x7) {
5187 case DESC_TYPE_RUN_LENGTH:
5188 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
5189 /* The descriptor includes the size of MonoObject */
5190 skip_size -= sizeof (MonoObject);
5191 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
5196 g_assert_not_reached ();
5200 case REMSET_ROOT_LOCATION:
5203 g_assert_not_reached ();
5208 g_assert_not_reached ();
5214 * Return whenever ADDR occurs in the remembered sets
5217 find_in_remsets (char *addr)
5220 SgenThreadInfo *info;
5221 RememberedSet *remset;
5223 gboolean found = FALSE;
5225 /* the global one */
5226 for (remset = global_remset; remset; remset = remset->next) {
5227 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
5228 for (p = remset->data; p < remset->store_next;) {
5229 p = find_in_remset_loc (p, addr, &found);
5234 /* the per-thread ones */
5235 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
5236 for (info = thread_table [i]; info; info = info->next) {
5237 for (remset = info->remset; remset; remset = remset->next) {
5238 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
5239 for (p = remset->data; p < remset->store_next;) {
5240 p = find_in_remset_loc (p, addr, &found);
5252 #define HANDLE_PTR(ptr,obj) do { \
5253 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
5254 if (!find_in_remsets ((char*)(ptr))) { \
5255 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
5256 g_assert_not_reached (); \
5262 * Check that each object reference inside the area which points into the nursery
5263 * can be found in the remembered sets.
5265 static void __attribute__((noinline))
5266 check_remsets_for_area (char *start, char *end)
5271 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
5273 new_obj_references = 0;
5274 obj_references_checked = 0;
5275 while (start < end) {
5276 if (!*(void**)start) {
5277 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
5280 vt = (GCVTable*)LOAD_VTABLE (start);
5281 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
5283 MonoObject *obj = (MonoObject*)start;
5284 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
5288 if (type == DESC_TYPE_STRING) {
5289 STRING_SIZE (skip_size, start);
5293 } else if (type == DESC_TYPE_RUN_LENGTH) {
5294 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5295 g_assert (skip_size);
5296 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5300 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
5301 skip_size = safe_object_get_size ((MonoObject*)start);
5302 skip_size += (ALLOC_ALIGN - 1);
5303 skip_size &= ~(ALLOC_ALIGN - 1);
5304 OBJ_VECTOR_FOREACH_PTR (vt, start);
5305 if (((MonoArray*)start)->bounds) {
5306 /* account for the bounds */
5307 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5312 } else if (type == DESC_TYPE_SMALL_BITMAP) {
5313 OBJ_BITMAP_SIZE (skip_size, desc, start);
5314 g_assert (skip_size);
5315 OBJ_BITMAP_FOREACH_PTR (desc,start);
5319 } else if (type == DESC_TYPE_LARGE_BITMAP) {
5320 skip_size = safe_object_get_size ((MonoObject*)start);
5321 skip_size += (ALLOC_ALIGN - 1);
5322 skip_size &= ~(ALLOC_ALIGN - 1);
5323 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5327 } else if (type == DESC_TYPE_COMPLEX) {
5328 /* this is a complex object */
5329 skip_size = safe_object_get_size ((MonoObject*)start);
5330 skip_size += (ALLOC_ALIGN - 1);
5331 skip_size &= ~(ALLOC_ALIGN - 1);
5332 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5336 } else if (type == DESC_TYPE_COMPLEX_ARR) {
5337 /* this is an array of complex structs */
5338 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
5339 skip_size *= mono_array_length ((MonoArray*)start);
5340 skip_size += sizeof (MonoArray);
5341 skip_size += (ALLOC_ALIGN - 1);
5342 skip_size &= ~(ALLOC_ALIGN - 1);
5343 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5344 if (((MonoArray*)start)->bounds) {
5345 /* account for the bounds */
5346 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5358 * Perform consistency check of the heap.
5360 * Assumes the world is stopped.
5363 check_consistency (void)
5365 GCMemSection *section;
5367 // Need to add more checks
5368 // FIXME: Create a general heap enumeration function and use that
5370 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
5372 // Check that oldspace->newspace pointers are registered with the collector
5373 for (section = section_list; section; section = section->next) {
5374 if (section->role == MEMORY_ROLE_GEN0)
5376 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
5377 check_remsets_for_area (section->data, section->next_data);
5380 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
5383 /* Check that the reference is valid */
5385 #define HANDLE_PTR(ptr,obj) do { \
5387 g_assert (safe_name (*(ptr)) != NULL); \
5394 * Perform consistency check on an object. Currently we only check that the
5395 * reference fields are valid.
5398 check_object (char *start)
5407 vt = (GCVTable*)LOAD_VTABLE (start);
5408 //type = vt->desc & 0x7;
5411 switch (desc & 0x7) {
5412 case DESC_TYPE_STRING:
5413 STRING_SIZE (skip_size, start);
5414 return start + skip_size;
5415 case DESC_TYPE_RUN_LENGTH:
5416 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5417 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5418 g_assert (skip_size);
5419 return start + skip_size;
5420 case DESC_TYPE_ARRAY:
5421 case DESC_TYPE_VECTOR:
5422 OBJ_VECTOR_FOREACH_PTR (vt, start);
5423 skip_size = safe_object_get_size ((MonoObject*)start);
5424 skip_size += (ALLOC_ALIGN - 1);
5425 skip_size &= ~(ALLOC_ALIGN - 1);
5426 return start + skip_size;
5427 case DESC_TYPE_SMALL_BITMAP:
5428 OBJ_BITMAP_FOREACH_PTR (desc,start);
5429 OBJ_BITMAP_SIZE (skip_size, desc, start);
5430 return start + skip_size;
5431 case DESC_TYPE_LARGE_BITMAP:
5432 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5433 skip_size = safe_object_get_size ((MonoObject*)start);
5434 skip_size += (ALLOC_ALIGN - 1);
5435 skip_size &= ~(ALLOC_ALIGN - 1);
5436 return start + skip_size;
5437 case DESC_TYPE_COMPLEX:
5438 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5439 /* this is a complex object */
5440 skip_size = safe_object_get_size ((MonoObject*)start);
5441 skip_size += (ALLOC_ALIGN - 1);
5442 skip_size &= ~(ALLOC_ALIGN - 1);
5443 return start + skip_size;
5444 case DESC_TYPE_COMPLEX_ARR:
5445 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5446 /* this is an array of complex structs */
5447 skip_size = safe_object_get_size ((MonoObject*)start);
5448 skip_size += (ALLOC_ALIGN - 1);
5449 skip_size &= ~(ALLOC_ALIGN - 1);
5450 return start + skip_size;
5452 g_assert_not_reached ();
5457 * ######################################################################
5458 * ######## Other mono public interface functions.
5459 * ######################################################################
5463 mono_gc_collect (int generation)
5466 update_current_thread_stack (&generation);
5468 if (generation == 0) {
5469 collect_nursery (0);
5471 major_collection ();
5478 mono_gc_max_generation (void)
5484 mono_gc_collection_count (int generation)
5486 if (generation == 0)
5487 return num_minor_gcs;
5488 return num_major_gcs;
5492 mono_gc_get_used_size (void)
5495 GCMemSection *section;
5497 tot = los_memory_usage;
5498 for (section = section_list; section; section = section->next) {
5499 /* this is approximate... */
5500 tot += section->next_data - section->data;
5502 /* FIXME: account for pinned objects */
5508 mono_gc_get_heap_size (void)
5514 mono_gc_disable (void)
5522 mono_gc_enable (void)
5530 mono_object_is_alive (MonoObject* o)
5536 mono_gc_get_generation (MonoObject *obj)
5538 if (ptr_in_nursery (obj))
5544 mono_gc_enable_events (void)
5549 mono_gc_weak_link_add (void **link_addr, MonoObject *obj, gboolean track)
5551 mono_gc_register_disappearing_link (obj, link_addr, track);
5555 mono_gc_weak_link_remove (void **link_addr)
5557 mono_gc_register_disappearing_link (NULL, link_addr, FALSE);
5561 mono_gc_weak_link_get (void **link_addr)
5565 return (MonoObject*) REVEAL_POINTER (*link_addr);
5569 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
5571 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
5572 return (void*)MAKE_ROOT_DESC (ROOT_DESC_BITMAP, bitmap [0]);
5574 mword complex = alloc_complex_descriptor (bitmap, numbits + 1);
5575 return (void*)MAKE_ROOT_DESC (ROOT_DESC_COMPLEX, complex);
5580 mono_gc_make_root_descr_user (MonoGCMarkFunc marker)
5584 g_assert (user_descriptors_next < MAX_USER_DESCRIPTORS);
5585 descr = (void*)MAKE_ROOT_DESC (ROOT_DESC_USER, (mword)user_descriptors_next);
5586 user_descriptors [user_descriptors_next ++] = marker;
5592 mono_gc_alloc_fixed (size_t size, void *descr)
5594 /* FIXME: do a single allocation */
5595 void *res = calloc (1, size);
5598 if (!mono_gc_register_root (res, size, descr)) {
5606 mono_gc_free_fixed (void* addr)
5608 mono_gc_deregister_root (addr);
5613 mono_gc_is_gc_thread (void)
5617 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
5623 mono_gc_base_init (void)
5627 struct sigaction sinfo;
5629 LOCK_INIT (gc_mutex);
5631 if (gc_initialized) {
5635 pagesize = mono_pagesize ();
5636 gc_debug_file = stderr;
5637 if ((env = getenv ("MONO_GC_DEBUG"))) {
5638 opts = g_strsplit (env, ",", -1);
5639 for (ptr = opts; ptr && *ptr; ptr ++) {
5641 if (opt [0] >= '0' && opt [0] <= '9') {
5642 gc_debug_level = atoi (opt);
5647 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
5648 gc_debug_file = fopen (rf, "wb");
5650 gc_debug_file = stderr;
5653 } else if (!strcmp (opt, "collect-before-allocs")) {
5654 collect_before_allocs = TRUE;
5655 } else if (!strcmp (opt, "check-at-minor-collections")) {
5656 consistency_check_at_minor_collection = TRUE;
5657 } else if (!strcmp (opt, "clear-at-gc")) {
5658 nursery_clear_policy = CLEAR_AT_GC;
5660 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
5661 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
5662 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections, clear-at-gc.\n");
5669 sem_init (&suspend_ack_semaphore, 0, 0);
5671 sigfillset (&sinfo.sa_mask);
5672 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
5673 sinfo.sa_sigaction = suspend_handler;
5674 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
5675 g_error ("failed sigaction");
5678 sinfo.sa_handler = restart_handler;
5679 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
5680 g_error ("failed sigaction");
5683 sigfillset (&suspend_signal_mask);
5684 sigdelset (&suspend_signal_mask, restart_signal_num);
5686 global_remset = alloc_remset (1024, NULL);
5687 global_remset->next = NULL;
5689 pthread_key_create (&remembered_set_key, unregister_thread);
5690 gc_initialized = TRUE;
5692 mono_gc_register_thread (&sinfo);
5700 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
5701 * for each class. This is currently not easy to do, as it is hard to generate basic
5702 * blocks + branches, but it is easy with the linear IL codebase.
5705 create_allocator (int atype)
5707 int tlab_next_addr_offset = -1;
5708 int tlab_temp_end_offset = -1;
5709 int p_var, size_var, tlab_next_addr_var, new_next_var;
5710 guint32 slowpath_branch;
5711 MonoMethodBuilder *mb;
5713 MonoMethodSignature *csig;
5714 static gboolean registered = FALSE;
5716 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5717 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5719 g_assert (tlab_next_addr_offset != -1);
5720 g_assert (tlab_temp_end_offset != -1);
5722 g_assert (atype == ATYPE_NORMAL);
5725 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5729 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5730 csig->ret = &mono_defaults.object_class->byval_arg;
5731 csig->params [0] = &mono_defaults.int_class->byval_arg;
5733 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5734 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5735 /* size = vtable->klass->instance_size; */
5736 mono_mb_emit_ldarg (mb, 0);
5737 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5738 mono_mb_emit_byte (mb, CEE_ADD);
5739 mono_mb_emit_byte (mb, CEE_LDIND_I);
5740 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5741 mono_mb_emit_byte (mb, CEE_ADD);
5742 /* FIXME: assert instance_size stays a 4 byte integer */
5743 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5744 mono_mb_emit_stloc (mb, size_var);
5746 /* size += ALLOC_ALIGN - 1; */
5747 mono_mb_emit_ldloc (mb, size_var);
5748 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5749 mono_mb_emit_byte (mb, CEE_ADD);
5750 /* size &= ~(ALLOC_ALIGN - 1); */
5751 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5752 mono_mb_emit_byte (mb, CEE_AND);
5753 mono_mb_emit_stloc (mb, size_var);
5756 * We need to modify tlab_next, but the JIT only supports reading, so we read
5757 * another tls var holding its address instead.
5760 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5761 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5762 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5763 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5764 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5765 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5767 /* p = (void**)tlab_next; */
5768 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5769 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5770 mono_mb_emit_byte (mb, CEE_LDIND_I);
5771 mono_mb_emit_stloc (mb, p_var);
5773 /* new_next = (char*)p + size; */
5774 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5775 mono_mb_emit_ldloc (mb, p_var);
5776 mono_mb_emit_ldloc (mb, size_var);
5777 mono_mb_emit_byte (mb, CEE_CONV_I);
5778 mono_mb_emit_byte (mb, CEE_ADD);
5779 mono_mb_emit_stloc (mb, new_next_var);
5781 /* tlab_next = new_next */
5782 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5783 mono_mb_emit_ldloc (mb, new_next_var);
5784 mono_mb_emit_byte (mb, CEE_STIND_I);
5786 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5787 mono_mb_emit_ldloc (mb, new_next_var);
5788 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5789 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5790 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5791 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5795 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5796 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5798 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5799 mono_mb_emit_ldarg (mb, 0);
5800 mono_mb_emit_ldloc (mb, size_var);
5801 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5802 mono_mb_emit_byte (mb, CEE_RET);
5805 mono_mb_patch_short_branch (mb, slowpath_branch);
5807 /* FIXME: Memory barrier */
5810 mono_mb_emit_ldloc (mb, p_var);
5811 mono_mb_emit_ldarg (mb, 0);
5812 mono_mb_emit_byte (mb, CEE_STIND_I);
5815 mono_mb_emit_ldloc (mb, p_var);
5816 mono_mb_emit_byte (mb, CEE_RET);
5818 res = mono_mb_create_method (mb, csig, 8);
5820 mono_method_get_header (res)->init_locals = FALSE;
5824 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5827 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5828 * The signature of the called method is:
5829 * object allocate (MonoVTable *vtable)
5832 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5834 int tlab_next_offset = -1;
5835 int tlab_temp_end_offset = -1;
5836 MonoClass *klass = vtable->klass;
5837 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5838 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5840 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5842 if (klass->instance_size > tlab_size)
5844 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5848 if (klass->byval_arg.type == MONO_TYPE_STRING)
5850 if (collect_before_allocs)
5853 return mono_gc_get_managed_allocator_by_type (0);
5857 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5863 mono_gc_get_managed_allocator_by_type (int atype)
5867 mono_loader_lock ();
5868 res = alloc_method_cache [atype];
5870 res = alloc_method_cache [atype] = create_allocator (atype);
5871 mono_loader_unlock ();
5876 mono_gc_get_managed_allocator_types (void)
5881 static MonoMethod *write_barrier_method;
5884 mono_gc_get_write_barrier (void)
5887 int remset_offset = -1;
5888 int remset_var, next_var;
5889 MonoMethodBuilder *mb;
5890 MonoMethodSignature *sig;
5893 MONO_THREAD_VAR_OFFSET (remembered_set, remset_offset);
5895 // FIXME: Maybe create a separate version for ctors (the branch would be
5896 // correctly predicted more times)
5897 if (write_barrier_method)
5898 return write_barrier_method;
5900 /* Create the IL version of mono_gc_barrier_generic_store () */
5901 sig = mono_metadata_signature_alloc (mono_defaults.corlib, 2);
5902 sig->ret = &mono_defaults.void_class->byval_arg;
5903 sig->params [0] = &mono_defaults.int_class->byval_arg;
5904 sig->params [1] = &mono_defaults.object_class->byval_arg;
5906 mb = mono_mb_new (mono_defaults.object_class, "wbarrier", MONO_WRAPPER_WRITE_BARRIER);
5908 /* ptr_in_nursery () check */
5909 #ifdef ALIGN_NURSERY
5911 * Masking out the bits might be faster, but we would have to use 64 bit
5912 * immediates, which might be slower.
5914 mono_mb_emit_ldarg (mb, 0);
5915 mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
5916 mono_mb_emit_byte (mb, CEE_SHR_UN);
5917 mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS);
5918 label1 = mono_mb_emit_branch (mb, CEE_BNE_UN);
5921 g_assert_not_reached ();
5924 /* Don't need write barrier case */
5925 /* do the assignment */
5926 mono_mb_emit_ldarg (mb, 0);
5927 mono_mb_emit_ldarg (mb, 1);
5928 /* Don't use STIND_REF, as it would cause infinite recursion */
5929 mono_mb_emit_byte (mb, CEE_STIND_I);
5930 mono_mb_emit_byte (mb, CEE_RET);
5932 /* Need write barrier case */
5933 mono_mb_patch_branch (mb, label1);
5935 if (remset_offset == -1)
5937 g_assert_not_reached ();
5939 // remset_var = remembered_set;
5940 remset_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5941 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5942 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5943 mono_mb_emit_i4 (mb, remset_offset);
5944 mono_mb_emit_stloc (mb, remset_var);
5946 // next_var = rs->store_next
5947 next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5948 mono_mb_emit_ldloc (mb, remset_var);
5949 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5950 mono_mb_emit_byte (mb, CEE_LDIND_I);
5951 mono_mb_emit_stloc (mb, next_var);
5953 // if (rs->store_next < rs->end_set) {
5954 mono_mb_emit_ldloc (mb, next_var);
5955 mono_mb_emit_ldloc (mb, remset_var);
5956 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, end_set));
5957 mono_mb_emit_byte (mb, CEE_LDIND_I);
5958 label2 = mono_mb_emit_branch (mb, CEE_BGE);
5960 /* write barrier fast path */
5961 // *(rs->store_next++) = (mword)ptr;
5962 mono_mb_emit_ldloc (mb, next_var);
5963 mono_mb_emit_ldarg (mb, 0);
5964 mono_mb_emit_byte (mb, CEE_STIND_I);
5966 mono_mb_emit_ldloc (mb, next_var);
5967 mono_mb_emit_icon (mb, sizeof (gpointer));
5968 mono_mb_emit_byte (mb, CEE_ADD);
5969 mono_mb_emit_stloc (mb, next_var);
5971 mono_mb_emit_ldloc (mb, remset_var);
5972 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5973 mono_mb_emit_ldloc (mb, next_var);
5974 mono_mb_emit_byte (mb, CEE_STIND_I);
5976 // *(void**)ptr = value;
5977 mono_mb_emit_ldarg (mb, 0);
5978 mono_mb_emit_ldarg (mb, 1);
5979 mono_mb_emit_byte (mb, CEE_STIND_I);
5980 mono_mb_emit_byte (mb, CEE_RET);
5982 /* write barrier slow path */
5983 mono_mb_patch_branch (mb, label2);
5985 mono_mb_emit_ldarg (mb, 0);
5986 mono_mb_emit_ldarg (mb, 1);
5987 mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_store);
5988 mono_mb_emit_byte (mb, CEE_RET);
5990 res = mono_mb_create_method (mb, sig, 16);
5993 mono_loader_lock ();
5994 if (write_barrier_method) {
5995 /* Already created */
5996 mono_free_method (res);
5998 /* double-checked locking */
5999 mono_memory_barrier ();
6000 write_barrier_method = res;
6002 mono_loader_unlock ();
6004 return write_barrier_method;
6007 #endif /* HAVE_SGEN_GC */