2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright 2005-2009 Novell, Inc (http://www.novell.com)
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list and critical_fin_list are part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
186 mono_gc_flush_info (void)
188 fflush (gc_debug_file);
192 #define MAX_DEBUG_LEVEL 8
193 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
195 #define TV_DECLARE(name) struct timeval name
196 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
197 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
199 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
209 /* each request from the OS ends up in a GCMemSection */
210 typedef struct _GCMemSection GCMemSection;
211 struct _GCMemSection {
215 /* pointer where more data could be allocated if it fits */
219 * scan starts is an array of pointers to objects equally spaced in the allocation area
220 * They let use quickly find pinned objects from pinning pointers.
223 /* in major collections indexes in the pin_queue for objects that pin this section */
226 unsigned short num_scan_start;
230 /* large object space struct: 64+ KB */
231 /* we could make this limit much smaller to avoid memcpy copy
232 * and potentially have more room in the GC descriptor: need to measure
233 * This also means that such small OS objects will need to be
234 * allocated in a different way (using pinned chunks).
235 * We may want to put large but smaller than 64k objects in the fixed space
236 * when we move the object from one generation to another (to limit the
237 * pig in the snake effect).
238 * Note: it may be worth to have an optimized copy function, since we can
239 * assume that objects are aligned and have a multiple of 8 size.
240 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
241 * true if MONO_ZERO_LEN_ARRAY is nonzero.
243 typedef struct _LOSObject LOSObject;
246 mword size; /* this is the object size */
247 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
250 char data [MONO_ZERO_LEN_ARRAY];
253 /* Pinned objects are allocated in the LOS space if bigger than half a page
254 * or from freelists otherwise. We assume that pinned objects are relatively few
255 * and they have a slow dying speed (like interned strings, thread objects).
256 * As such they will be collected only at major collections.
257 * free lists are not global: when we need memory we allocate a PinnedChunk.
258 * Each pinned chunk is made of several pages, the first of wich is used
259 * internally for bookeeping (here think of a page as 4KB). The bookeeping
260 * includes the freelists vectors and info about the object size of each page
261 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
262 * a size is assigned to it, the page is divided in the proper chunks and each
263 * chunk is added to the freelist. To not waste space, the remaining space in the
264 * first page is used as objects of size 16 or 32 (need to measure which are more
266 * We use this same structure to allocate memory used internally by the GC, so
267 * we never use malloc/free if we need to alloc during collection: the world is stopped
268 * and malloc/free will deadlock.
269 * When we want to iterate over pinned objects, we just scan a page at a time
270 * linearly according to the size of objects in the page: the next pointer used to link
271 * the items in the freelist uses the same word as the vtable. Since we keep freelists
272 * for each pinned chunk, if the word points outside the pinned chunk it means
274 * We could avoid this expensive scanning in creative ways. We could have a policy
275 * of putting in the pinned space only objects we know about that have no struct fields
276 * with references and we can easily use a even expensive write barrier for them,
277 * since pointer writes on such objects should be rare.
278 * The best compromise is to just alloc interned strings and System.MonoType in them.
279 * It would be nice to allocate MonoThread in it, too: must check that we properly
280 * use write barriers so we don't have to do any expensive scanning of the whole pinned
281 * chunk list during minor collections. We can avoid it now because we alloc in it only
282 * reference-free objects.
284 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
285 #define MAX_FREELIST_SIZE 2048
286 #define PINNED_PAGE_SIZE (4096)
287 #define PINNED_CHUNK_MIN_SIZE (4096*8)
288 typedef struct _PinnedChunk PinnedChunk;
289 struct _PinnedChunk {
292 int *page_sizes; /* a 0 means the page is still unused */
295 void *data [1]; /* page sizes and free lists are stored here */
298 /* The method used to clear the nursery */
299 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
300 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
305 CLEAR_AT_TLAB_CREATION
306 } NurseryClearPolicy;
308 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
311 * If this is set, the nursery is aligned to an address aligned to its size, ie.
312 * a 1MB nursery will be aligned to an address divisible by 1MB. This allows us to
313 * speed up ptr_in_nursery () checks which are very frequent. This requires the
314 * nursery size to be a compile time constant.
316 #define ALIGN_NURSERY 1
319 * The young generation is divided into fragments. This is because
320 * we can hand one fragments to a thread for lock-less fast alloc and
321 * because the young generation ends up fragmented anyway by pinned objects.
322 * Once a collection is done, a list of fragments is created. When doing
323 * thread local alloc we use smallish nurseries so we allow new threads to
324 * allocate memory from gen0 without triggering a collection. Threads that
325 * are found to allocate lots of memory are given bigger fragments. This
326 * should make the finalizer thread use little nursery memory after a while.
327 * We should start assigning threads very small fragments: if there are many
328 * threads the nursery will be full of reserved space that the threads may not
329 * use at all, slowing down allocation speed.
330 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
331 * Allocation Buffers (TLABs).
333 typedef struct _Fragment Fragment;
337 char *fragment_start;
338 char *fragment_limit; /* the current soft limit for allocation */
342 /* the runtime can register areas of memory as roots: we keep two lists of roots,
343 * a pinned root set for conservatively scanned roots and a normal one for
344 * precisely scanned roots (currently implemented as a single list).
346 typedef struct _RootRecord RootRecord;
354 /* for use with write barriers */
355 typedef struct _RememberedSet RememberedSet;
356 struct _RememberedSet {
360 mword data [MONO_ZERO_LEN_ARRAY];
363 /* we have 4 possible values in the low 2 bits */
365 REMSET_LOCATION, /* just a pointer to the exact location */
366 REMSET_RANGE, /* range of pointer fields */
367 REMSET_OBJECT, /* mark all the object for scanning */
368 REMSET_OTHER, /* all others */
369 REMSET_TYPE_MASK = 0x3
372 /* Subtypes of REMSET_OTHER */
374 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
375 REMSET_ROOT_LOCATION, /* a location inside a root */
378 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
379 static pthread_key_t remembered_set_key;
380 static RememberedSet *global_remset;
381 //static int store_to_global_remset = 0;
383 /* FIXME: later choose a size that takes into account the RememberedSet struct
384 * and doesn't waste any alloc paddin space.
386 #define DEFAULT_REMSET_SIZE 1024
387 static RememberedSet* alloc_remset (int size, gpointer id);
389 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
390 * no cast from a pointer to an integer
397 /* these bits are set in the object vtable: we could merge them since an object can be
398 * either pinned or forwarded but not both.
399 * We store them in the vtable slot because the bits are used in the sync block for
400 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
401 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
402 * would be an invalid combination for the monitor and hash code).
403 * The values are already shifted.
404 * The forwarding address is stored in the sync block.
406 #define FORWARDED_BIT 1
408 #define VTABLE_BITS_MASK 0x3
410 /* returns NULL if not forwarded, or the forwarded address */
411 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
412 /* set the forwarded address fw_addr for object obj */
413 #define forward_object(obj,fw_addr) do { \
414 ((mword*)(obj))[0] |= FORWARDED_BIT; \
415 ((mword*)(obj))[1] = (mword)(fw_addr); \
418 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
419 #define pin_object(obj) do { \
420 ((mword*)(obj))[0] |= PINNED_BIT; \
422 #define unpin_object(obj) do { \
423 ((mword*)(obj))[0] &= ~PINNED_BIT; \
427 #define ptr_in_nursery(ptr) (((mword)(ptr) & ~((1 << DEFAULT_NURSERY_BITS) - 1)) == (mword)nursery_start)
429 #define ptr_in_nursery(ptr) ((char*)(ptr) >= nursery_start && (char*)(ptr) < nursery_real_end)
433 * Since we set bits in the vtable, use the macro to load it from the pointer to
434 * an object that is potentially pinned.
436 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
439 safe_name (void* obj)
441 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
442 return vt->klass->name;
446 safe_object_get_size (MonoObject* o)
448 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
449 if (klass == mono_defaults.string_class) {
450 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
451 } else if (klass->rank) {
452 MonoArray *array = (MonoArray*)o;
453 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
454 if (G_UNLIKELY (array->bounds)) {
457 size += sizeof (MonoArrayBounds) * klass->rank;
461 /* from a created object: the class must be inited already */
462 return klass->instance_size;
466 static inline gboolean
467 is_maybe_half_constructed (MonoObject *o)
471 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
472 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
473 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
480 * ######################################################################
481 * ######## Global data.
482 * ######################################################################
484 static LOCK_DECLARE (gc_mutex);
485 static int gc_disabled = 0;
486 static int num_minor_gcs = 0;
487 static int num_major_gcs = 0;
489 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
490 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
491 #define DEFAULT_NURSERY_SIZE (1024*512*2)
492 /* The number of trailing 0 bits in DEFAULT_NURSERY_SIZE */
493 #define DEFAULT_NURSERY_BITS 20
494 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
495 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
496 /* to quickly find the head of an object pinned by a conservative address
497 * we keep track of the objects allocated for each SCAN_START_SIZE memory
498 * chunk in the nursery or other memory sections. Larger values have less
499 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
501 #define SCAN_START_SIZE (4096*2)
502 /* the minimum size of a fragment that we consider useful for allocation */
503 #define FRAGMENT_MIN_SIZE (512)
504 /* This is a fixed value used for pinned chunks, not the system pagesize */
505 #define FREELIST_PAGESIZE 4096
507 static mword pagesize = 4096;
508 static mword nursery_size = DEFAULT_NURSERY_SIZE;
509 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
510 static mword max_section_size = DEFAULT_MAX_SECTION;
511 static int section_size_used = 0;
512 static int degraded_mode = 0;
514 static LOSObject *los_object_list = NULL;
515 static mword los_memory_usage = 0;
516 static mword los_num_objects = 0;
517 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
518 static mword total_alloc = 0;
519 /* use this to tune when to do a major/minor collection */
520 static mword memory_pressure = 0;
522 static GCMemSection *section_list = NULL;
523 static GCMemSection *nursery_section = NULL;
524 static mword lowest_heap_address = ~(mword)0;
525 static mword highest_heap_address = 0;
527 typedef struct _FinalizeEntry FinalizeEntry;
528 struct _FinalizeEntry {
533 typedef struct _DisappearingLink DisappearingLink;
534 struct _DisappearingLink {
535 DisappearingLink *next;
539 #define HIDE_POINTER(p) ((gpointer)(~(gulong)(p)))
540 #define REVEAL_POINTER(p) HIDE_POINTER ((p))
542 #define DISLINK_OBJECT(d) (REVEAL_POINTER (*(d)->link))
545 * The finalizable hash has the object as the key, the
546 * disappearing_link hash, has the link address as key.
548 static FinalizeEntry **finalizable_hash = NULL;
549 /* objects that are ready to be finalized */
550 static FinalizeEntry *fin_ready_list = NULL;
551 static FinalizeEntry *critical_fin_list = NULL;
552 static DisappearingLink **disappearing_link_hash = NULL;
553 static mword disappearing_link_hash_size = 0;
554 static mword finalizable_hash_size = 0;
556 static int num_registered_finalizers = 0;
557 static int num_ready_finalizers = 0;
558 static int num_disappearing_links = 0;
559 static int no_finalize = 0;
561 /* keep each size a multiple of ALLOC_ALIGN */
562 /* on 64 bit systems 8 is likely completely unused. */
563 static const int freelist_sizes [] = {
564 8, 16, 24, 32, 40, 48, 64, 80,
565 96, 128, 160, 192, 224, 256, 320, 384,
566 448, 512, 584, 680, 816, 1024, 1360, 2048};
567 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
569 static char* max_pinned_chunk_addr = NULL;
570 static char* min_pinned_chunk_addr = (char*)-1;
571 /* pinned_chunk_list is used for allocations of objects that are never moved */
572 static PinnedChunk *pinned_chunk_list = NULL;
573 /* internal_chunk_list is used for allocating structures needed by the GC */
574 static PinnedChunk *internal_chunk_list = NULL;
577 obj_is_from_pinned_alloc (char *p)
579 PinnedChunk *chunk = pinned_chunk_list;
580 for (; chunk; chunk = chunk->next) {
581 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
588 ROOT_TYPE_NORMAL = 0, /* "normal" roots */
589 ROOT_TYPE_PINNED = 1, /* roots without a GC descriptor */
590 ROOT_TYPE_WBARRIER = 2, /* roots with a write barrier */
594 /* registered roots: the key to the hash is the root start address */
596 * Different kinds of roots are kept separate to speed up pin_from_roots () for example.
598 static RootRecord **roots_hash [ROOT_TYPE_NUM] = { NULL, NULL };
599 static int roots_hash_size [ROOT_TYPE_NUM] = { 0, 0, 0 };
600 static mword roots_size = 0; /* amount of memory in the root set */
601 static int num_roots_entries [ROOT_TYPE_NUM] = { 0, 0, 0 };
604 * The current allocation cursors
605 * We allocate objects in the nursery.
606 * The nursery is the area between nursery_start and nursery_real_end.
607 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
608 * from nursery fragments.
609 * tlab_next is the pointer to the space inside the TLAB where the next object will
611 * tlab_temp_end is the pointer to the end of the temporary space reserved for
612 * the allocation: it allows us to set the scan starts at reasonable intervals.
613 * tlab_real_end points to the end of the TLAB.
614 * nursery_frag_real_end points to the end of the currently used nursery fragment.
615 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
616 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
617 * At the next allocation, the area of the nursery where objects can be present is
618 * between MIN(nursery_first_pinned_start, first_fragment_start) and
619 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
621 static char *nursery_start = NULL;
624 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
625 * variables for next+temp_end ?
627 static __thread char *tlab_start;
628 static __thread char *tlab_next;
629 static __thread char *tlab_temp_end;
630 static __thread char *tlab_real_end;
631 /* Used by the managed allocator */
632 static __thread char **tlab_next_addr;
633 static char *nursery_next = NULL;
634 static char *nursery_frag_real_end = NULL;
635 static char *nursery_real_end = NULL;
636 //static char *nursery_first_pinned_start = NULL;
637 static char *nursery_last_pinned_end = NULL;
639 /* The size of a TLAB */
640 /* The bigger the value, the less often we have to go to the slow path to allocate a new
641 * one, but the more space is wasted by threads not allocating much memory.
643 * FIXME: Make this self-tuning for each thread.
645 static guint32 tlab_size = (1024 * 4);
647 /* fragments that are free and ready to be used for allocation */
648 static Fragment *nursery_fragments = NULL;
649 /* freeelist of fragment structures */
650 static Fragment *fragment_freelist = NULL;
653 * used when moving the objects
654 * When the nursery is collected, objects are copied to to_space.
655 * The area between gray_first and gray_objects is used as a stack
656 * of objects that need their fields checked for more references
658 * We should optimize somehow this mechanism to avoid rescanning
659 * ptr-free objects. The order is also probably not optimal: need to
660 * test cache misses and other graph traversal orders.
662 static char *to_space = NULL;
663 static char *gray_first = NULL;
664 static char *gray_objects = NULL;
665 static char *to_space_end = NULL;
666 static GCMemSection *to_space_section = NULL;
668 /* objects bigger then this go into the large object space */
669 #define MAX_SMALL_OBJ_SIZE 0xffff
672 * ######################################################################
673 * ######## Macros and function declarations.
674 * ######################################################################
677 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
678 if ((mword)(low) < lowest_heap_address) \
679 lowest_heap_address = (mword)(low); \
680 if ((mword)(high) > highest_heap_address) \
681 highest_heap_address = (mword)(high); \
685 align_pointer (void *ptr)
687 mword p = (mword)ptr;
688 p += sizeof (gpointer) - 1;
689 p &= ~ (sizeof (gpointer) - 1);
693 /* forward declarations */
694 static void* get_internal_mem (size_t size);
695 static void free_internal_mem (void *addr);
696 static void* get_os_memory (size_t size, int activate);
697 static void free_os_memory (void *addr, size_t size);
698 static G_GNUC_UNUSED void report_internal_mem_usage (void);
700 static int stop_world (void);
701 static int restart_world (void);
702 static void pin_thread_data (void *start_nursery, void *end_nursery);
703 static void scan_from_remsets (void *start_nursery, void *end_nursery);
704 static void find_pinning_ref_from_thread (char *obj, size_t size);
705 static void update_current_thread_stack (void *start);
706 static GCMemSection* alloc_section (size_t size);
707 static void finalize_in_range (char *start, char *end);
708 static void null_link_in_range (char *start, char *end);
709 static gboolean search_fragment_for_size (size_t size);
710 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
711 static void clear_remsets (void);
712 static void clear_tlabs (void);
713 static char *find_tlab_next_from_address (char *addr);
714 static void sweep_pinned_objects (void);
715 static void scan_from_pinned_objects (char *addr_start, char *addr_end);
716 static void free_large_object (LOSObject *obj);
717 static void free_mem_section (GCMemSection *section);
719 void describe_ptr (char *ptr);
720 void check_consistency (void);
721 char* check_object (char *start);
724 * ######################################################################
725 * ######## GC descriptors
726 * ######################################################################
727 * Used to quickly get the info the GC needs about an object: size and
728 * where the references are held.
730 /* objects are aligned to 8 bytes boundaries
731 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
732 * The low 3 bits define the type of the descriptor. The other bits
733 * depend on the type.
734 * As a general rule the 13 remaining low bits define the size, either
735 * of the whole object or of the elements in the arrays. While for objects
736 * the size is already in bytes, for arrays we need to shift, because
737 * array elements might be smaller than 8 bytes. In case of arrays, we
738 * use two bits to describe what the additional high bits represents,
739 * so the default behaviour can handle element sizes less than 2048 bytes.
740 * The high 16 bits, if 0 it means the object is pointer-free.
741 * This design should make it easy and fast to skip over ptr-free data.
742 * The first 4 types should cover >95% of the objects.
743 * Note that since the size of objects is limited to 64K, larger objects
744 * will be allocated in the large object heap.
745 * If we want 4-bytes alignment, we need to put vector and small bitmap
749 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
750 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
751 DESC_TYPE_STRING, /* nothing */
752 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
753 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
754 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
755 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
756 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
757 /* subtypes for arrays and vectors */
758 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
759 DESC_TYPE_V_REFS, /* all the array elements are refs */
760 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
761 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
764 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
765 #define LOW_TYPE_BITS 3
766 #define SMALL_BITMAP_SHIFT 16
767 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
768 #define VECTOR_INFO_SHIFT 14
769 #define VECTOR_ELSIZE_SHIFT 3
770 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
771 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
772 #define SMALL_SIZE_MASK 0xfff8
773 #define MAX_ELEMENT_SIZE 0x3ff
774 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
775 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
776 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
777 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
778 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
780 #define ALLOC_ALIGN 8
783 /* Root bitmap descriptors are simpler: the lower three bits describe the type
784 * and we either have 30/62 bitmap bits or nibble-based run-length,
785 * or a complex descriptor, or a user defined marker function.
788 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
793 ROOT_DESC_TYPE_MASK = 0x7,
794 ROOT_DESC_TYPE_SHIFT = 3,
797 #define MAKE_ROOT_DESC(type,val) ((type) | ((val) << ROOT_DESC_TYPE_SHIFT))
799 #define MAX_USER_DESCRIPTORS 16
801 static gsize* complex_descriptors = NULL;
802 static int complex_descriptors_size = 0;
803 static int complex_descriptors_next = 0;
804 static MonoGCMarkFunc user_descriptors [MAX_USER_DESCRIPTORS];
805 static int user_descriptors_next = 0;
808 alloc_complex_descriptor (gsize *bitmap, int numbits)
810 int nwords = numbits/GC_BITS_PER_WORD + 2;
815 res = complex_descriptors_next;
816 /* linear search, so we don't have duplicates with domain load/unload
817 * this should not be performance critical or we'd have bigger issues
818 * (the number and size of complex descriptors should be small).
820 for (i = 0; i < complex_descriptors_next; ) {
821 if (complex_descriptors [i] == nwords) {
823 for (j = 0; j < nwords - 1; ++j) {
824 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
834 i += complex_descriptors [i];
836 if (complex_descriptors_next + nwords > complex_descriptors_size) {
837 int new_size = complex_descriptors_size * 2 + nwords;
838 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
839 complex_descriptors_size = new_size;
841 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
842 complex_descriptors_next += nwords;
843 complex_descriptors [res] = nwords;
844 for (i = 0; i < nwords - 1; ++i) {
845 complex_descriptors [res + 1 + i] = bitmap [i];
846 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
853 * Descriptor builders.
856 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
858 return (void*) DESC_TYPE_STRING;
862 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
864 int first_set = -1, num_set = 0, last_set = -1, i;
866 size_t stored_size = obj_size;
867 stored_size += ALLOC_ALIGN - 1;
868 stored_size &= ~(ALLOC_ALIGN - 1);
869 for (i = 0; i < numbits; ++i) {
870 if (bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
877 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
878 /* check run-length encoding first: one byte offset, one byte number of pointers
879 * on 64 bit archs, we can have 3 runs, just one on 32.
880 * It may be better to use nibbles.
883 desc = DESC_TYPE_RUN_LENGTH | stored_size;
884 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
886 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
887 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
888 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
891 /* we know the 2-word header is ptr-free */
892 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
893 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
894 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
898 /* we know the 2-word header is ptr-free */
899 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
900 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
901 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
904 /* it's a complex object ... */
905 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
909 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
911 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
913 int first_set = -1, num_set = 0, last_set = -1, i;
914 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
915 for (i = 0; i < numbits; ++i) {
916 if (elem_bitmap [i / GC_BITS_PER_WORD] & ((gsize)1 << (i % GC_BITS_PER_WORD))) {
923 if (elem_size <= MAX_ELEMENT_SIZE) {
924 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
926 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
928 /* Note: we also handle structs with just ref fields */
929 if (num_set * sizeof (gpointer) == elem_size) {
930 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
932 /* FIXME: try run-len first */
933 /* Note: we can't skip the object header here, because it's not present */
934 if (last_set <= SMALL_BITMAP_SIZE) {
935 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
938 /* it's am array of complex structs ... */
939 desc = DESC_TYPE_COMPLEX_ARR;
940 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
944 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
945 #define STRING_SIZE(size,str) do { \
946 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
947 (size) += (ALLOC_ALIGN - 1); \
948 (size) &= ~(ALLOC_ALIGN - 1); \
951 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
952 (size) = (desc) & 0xfff8; \
955 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
956 (size) = (desc) & 0xfff8; \
959 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
960 #define PREFETCH(addr)
962 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
963 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
964 if ((desc) & 0xffff0000) { \
965 /* there are pointers */ \
966 void **_objptr_end; \
967 void **_objptr = (void**)(obj); \
968 _objptr += ((desc) >> 16) & 0xff; \
969 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
970 while (_objptr < _objptr_end) { \
971 HANDLE_PTR (_objptr, (obj)); \
977 /* a bitmap desc means that there are pointer references or we'd have
978 * choosen run-length, instead: add an assert to check.
980 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
981 /* there are pointers */ \
982 void **_objptr = (void**)(obj); \
983 gsize _bmap = (desc) >> 16; \
984 _objptr += OBJECT_HEADER_WORDS; \
987 HANDLE_PTR (_objptr, (obj)); \
994 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
995 /* there are pointers */ \
996 void **_objptr = (void**)(obj); \
997 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
998 _objptr += OBJECT_HEADER_WORDS; \
1000 if ((_bmap & 1)) { \
1001 HANDLE_PTR (_objptr, (obj)); \
1008 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
1009 /* there are pointers */ \
1010 void **_objptr = (void**)(obj); \
1011 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1012 int bwords = (*bitmap_data) - 1; \
1013 void **start_run = _objptr; \
1016 MonoObject *myobj = (MonoObject*)obj; \
1017 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1019 while (bwords-- > 0) { \
1020 gsize _bmap = *bitmap_data++; \
1021 _objptr = start_run; \
1022 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
1024 if ((_bmap & 1)) { \
1025 HANDLE_PTR (_objptr, (obj)); \
1030 start_run += GC_BITS_PER_WORD; \
1034 /* this one is untested */
1035 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
1036 /* there are pointers */ \
1037 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
1038 int mbwords = (*mbitmap_data++) - 1; \
1039 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
1040 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1041 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1043 MonoObject *myobj = (MonoObject*)start; \
1044 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1046 while (e_start < e_end) { \
1047 void **_objptr = (void**)e_start; \
1048 gsize *bitmap_data = mbitmap_data; \
1049 unsigned int bwords = mbwords; \
1050 while (bwords-- > 0) { \
1051 gsize _bmap = *bitmap_data++; \
1052 void **start_run = _objptr; \
1053 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1055 if ((_bmap & 1)) { \
1056 HANDLE_PTR (_objptr, (obj)); \
1061 _objptr = start_run + GC_BITS_PER_WORD; \
1063 e_start += el_size; \
1067 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1068 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1069 if ((vt)->desc & 0xffffc000) { \
1070 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1071 /* there are pointers */ \
1072 int etype = (vt)->desc & 0xc000; \
1073 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1074 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1075 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1076 /* Note: this code can handle also arrays of struct with only references in them */ \
1077 while (p < end_refs) { \
1078 HANDLE_PTR (p, (obj)); \
1081 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1082 int offset = ((vt)->desc >> 16) & 0xff; \
1083 int num_refs = ((vt)->desc >> 24) & 0xff; \
1084 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1085 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1086 while (e_start < e_end) { \
1087 void **p = (void**)e_start; \
1090 for (i = 0; i < num_refs; ++i) { \
1091 HANDLE_PTR (p + i, (obj)); \
1093 e_start += el_size; \
1095 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1096 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1097 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1098 while (e_start < e_end) { \
1099 void **p = (void**)e_start; \
1100 gsize _bmap = (vt)->desc >> 16; \
1101 /* Note: there is no object header here to skip */ \
1103 if ((_bmap & 1)) { \
1104 HANDLE_PTR (p, (obj)); \
1109 e_start += el_size; \
1115 static mword new_obj_references = 0;
1116 static mword obj_references_checked = 0;
1119 #define HANDLE_PTR(ptr,obj) do { \
1120 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1121 new_obj_references++; \
1122 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1124 obj_references_checked++; \
1129 * ######################################################################
1130 * ######## Detecting and removing garbage.
1131 * ######################################################################
1132 * This section of code deals with detecting the objects no longer in use
1133 * and reclaiming the memory.
1136 static void __attribute__((noinline))
1137 scan_area (char *start, char *end)
1142 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1144 new_obj_references = 0;
1145 obj_references_checked = 0;
1146 while (start < end) {
1147 if (!*(void**)start) {
1148 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1151 vt = (GCVTable*)LOAD_VTABLE (start);
1152 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1154 MonoObject *obj = (MonoObject*)start;
1155 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1159 if (type == DESC_TYPE_STRING) {
1160 STRING_SIZE (skip_size, start);
1164 } else if (type == DESC_TYPE_RUN_LENGTH) {
1165 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1166 g_assert (skip_size);
1167 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1171 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1172 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1173 skip_size *= mono_array_length ((MonoArray*)start);
1174 skip_size += sizeof (MonoArray);
1175 skip_size += (ALLOC_ALIGN - 1);
1176 skip_size &= ~(ALLOC_ALIGN - 1);
1177 OBJ_VECTOR_FOREACH_PTR (vt, start);
1178 if (type == DESC_TYPE_ARRAY) {
1179 /* account for the bounds */
1184 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1185 OBJ_BITMAP_SIZE (skip_size, desc, start);
1186 g_assert (skip_size);
1187 OBJ_BITMAP_FOREACH_PTR (desc,start);
1191 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1192 skip_size = safe_object_get_size ((MonoObject*)start);
1193 skip_size += (ALLOC_ALIGN - 1);
1194 skip_size &= ~(ALLOC_ALIGN - 1);
1195 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1199 } else if (type == DESC_TYPE_COMPLEX) {
1200 /* this is a complex object */
1201 skip_size = safe_object_get_size ((MonoObject*)start);
1202 skip_size += (ALLOC_ALIGN - 1);
1203 skip_size &= ~(ALLOC_ALIGN - 1);
1204 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1208 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1209 /* this is an array of complex structs */
1210 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1211 skip_size *= mono_array_length ((MonoArray*)start);
1212 skip_size += sizeof (MonoArray);
1213 skip_size += (ALLOC_ALIGN - 1);
1214 skip_size &= ~(ALLOC_ALIGN - 1);
1215 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1216 if (type == DESC_TYPE_ARRAY) {
1217 /* account for the bounds */
1226 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1227 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1228 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1231 static void __attribute__((noinline))
1232 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1239 while (start < end) {
1240 if (!*(void**)start) {
1241 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1244 vt = (GCVTable*)LOAD_VTABLE (start);
1245 /* handle threads someway (maybe insert the root domain vtable?) */
1246 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1247 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1254 if (type == DESC_TYPE_STRING) {
1255 STRING_SIZE (skip_size, start);
1256 if (remove) memset (start, 0, skip_size);
1259 } else if (type == DESC_TYPE_RUN_LENGTH) {
1260 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1261 g_assert (skip_size);
1262 if (remove) memset (start, 0, skip_size);
1265 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1266 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1267 skip_size *= mono_array_length ((MonoArray*)start);
1268 skip_size += sizeof (MonoArray);
1269 skip_size += (ALLOC_ALIGN - 1);
1270 skip_size &= ~(ALLOC_ALIGN - 1);
1271 if (type == DESC_TYPE_ARRAY) {
1272 /* account for the bounds */
1274 if (remove) memset (start, 0, skip_size);
1277 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1278 OBJ_BITMAP_SIZE (skip_size, desc, start);
1279 g_assert (skip_size);
1280 if (remove) memset (start, 0, skip_size);
1283 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1284 skip_size = safe_object_get_size ((MonoObject*)start);
1285 skip_size += (ALLOC_ALIGN - 1);
1286 skip_size &= ~(ALLOC_ALIGN - 1);
1287 if (remove) memset (start, 0, skip_size);
1290 } else if (type == DESC_TYPE_COMPLEX) {
1291 /* this is a complex object */
1292 skip_size = safe_object_get_size ((MonoObject*)start);
1293 skip_size += (ALLOC_ALIGN - 1);
1294 skip_size &= ~(ALLOC_ALIGN - 1);
1295 if (remove) memset (start, 0, skip_size);
1298 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1299 /* this is an array of complex structs */
1300 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1301 skip_size *= mono_array_length ((MonoArray*)start);
1302 skip_size += sizeof (MonoArray);
1303 skip_size += (ALLOC_ALIGN - 1);
1304 skip_size &= ~(ALLOC_ALIGN - 1);
1305 if (type == DESC_TYPE_ARRAY) {
1306 /* account for the bounds */
1308 if (remove) memset (start, 0, skip_size);
1318 * When appdomains are unloaded we can easily remove objects that have finalizers,
1319 * but all the others could still be present in random places on the heap.
1320 * We need a sweep to get rid of them even though it's going to be costly
1322 * The reason we need to remove them is because we access the vtable and class
1323 * structures to know the object size and the reference bitmap: once the domain is
1324 * unloaded the point to random memory.
1327 mono_gc_clear_domain (MonoDomain * domain)
1329 GCMemSection *section;
1331 for (section = section_list; section; section = section->next) {
1332 scan_area_for_domain (domain, section->data, section->end_data);
1334 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1340 * add_to_global_remset:
1342 * The global remset contains locations which point into newspace after
1343 * a minor collection. This can happen if the objects they point to are pinned.
1346 add_to_global_remset (gpointer ptr, gboolean root)
1350 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1353 * FIXME: If an object remains pinned, we need to add it at every minor collection.
1354 * To avoid uncontrolled growth of the global remset, only add each pointer once.
1356 if (global_remset->store_next + 3 < global_remset->end_set) {
1358 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1359 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1361 *(global_remset->store_next++) = (mword)ptr;
1365 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1366 rs->next = global_remset;
1369 *(global_remset->store_next++) = (mword)ptr | REMSET_OTHER;
1370 *(global_remset->store_next++) = (mword)REMSET_ROOT_LOCATION;
1372 *(global_remset->store_next++) = (mword)ptr;
1376 int global_rs_size = 0;
1378 for (rs = global_remset; rs; rs = rs->next) {
1379 global_rs_size += rs->store_next - rs->data;
1381 DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size));
1386 * This is how the copying happens from the nursery to the old generation.
1387 * We assume that at this time all the pinned objects have been identified and
1389 * We run scan_object() for each pinned object so that each referenced
1390 * objects if possible are copied. The new gray objects created can have
1391 * scan_object() run on them right away, too.
1392 * Then we run copy_object() for the precisely tracked roots. At this point
1393 * all the roots are either gray or black. We run scan_object() on the gray
1394 * objects until no more gray objects are created.
1395 * At the end of the process we walk again the pinned list and we unmark
1396 * the pinned flag. As we go we also create the list of free space for use
1397 * in the next allocation runs.
1399 * We need to remember objects from the old generation that point to the new one
1400 * (or just addresses?).
1402 * copy_object could be made into a macro once debugged (use inline for now).
1405 static char* __attribute__((noinline))
1406 copy_object (char *obj, char *from_space_start, char *from_space_end)
1408 static void *copy_labels [] = { &&LAB_0, &&LAB_1, &&LAB_2, &&LAB_3, &&LAB_4, &&LAB_5, &&LAB_6, &&LAB_7, &&LAB_8 };
1411 * FIXME: The second set of checks is only needed if we are called for tospace
1414 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1418 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1419 if ((forwarded = object_is_forwarded (obj))) {
1420 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1421 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1424 if (object_is_pinned (obj)) {
1425 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1426 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1429 objsize = safe_object_get_size ((MonoObject*)obj);
1430 objsize += ALLOC_ALIGN - 1;
1431 objsize &= ~(ALLOC_ALIGN - 1);
1432 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1433 /* FIXME: handle pinned allocs:
1434 * Large objects are simple, at least until we always follow the rule:
1435 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1436 * At the end of major collections, we walk the los list and if
1437 * the object is pinned, it is marked, otherwise it can be freed.
1439 if (G_UNLIKELY (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj)))) {
1440 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1444 /* ok, the object is not pinned, we can move it */
1445 /* use a optimized memcpy here */
1446 if (objsize <= sizeof (gpointer) * 8) {
1447 mword *dest = (mword*)gray_objects;
1448 goto *copy_labels [objsize / sizeof (gpointer)];
1450 (dest) [7] = ((mword*)obj) [7];
1452 (dest) [6] = ((mword*)obj) [6];
1454 (dest) [5] = ((mword*)obj) [5];
1456 (dest) [4] = ((mword*)obj) [4];
1458 (dest) [3] = ((mword*)obj) [3];
1460 (dest) [2] = ((mword*)obj) [2];
1462 (dest) [1] = ((mword*)obj) [1];
1464 (dest) [0] = ((mword*)obj) [0];
1472 char* edi = gray_objects;
1473 __asm__ __volatile__(
1475 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1476 : "0" (objsize/4), "1" (edi),"2" (esi)
1481 memcpy (gray_objects, obj, objsize);
1484 /* adjust array->bounds */
1485 vt = ((MonoObject*)obj)->vtable;
1486 g_assert (vt->gc_descr);
1487 if (G_UNLIKELY (vt->rank && ((MonoArray*)obj)->bounds)) {
1488 MonoArray *array = (MonoArray*)gray_objects;
1489 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1490 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1492 /* set the forwarding pointer */
1493 forward_object (obj, gray_objects);
1495 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1496 gray_objects += objsize;
1497 DEBUG (8, g_assert (gray_objects <= to_space_end));
1504 #define HANDLE_PTR(ptr,obj) do { \
1505 void *__old = *(ptr); \
1507 *(ptr) = copy_object (__old, from_start, from_end); \
1508 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1509 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) && !ptr_in_nursery (ptr)) \
1510 add_to_global_remset ((ptr), FALSE); \
1515 * Scan the object pointed to by @start for references to
1516 * other objects between @from_start and @from_end and copy
1517 * them to the gray_objects area.
1518 * Returns a pointer to the end of the object.
1521 scan_object (char *start, char* from_start, char* from_end)
1527 vt = (GCVTable*)LOAD_VTABLE (start);
1528 //type = vt->desc & 0x7;
1530 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1532 switch (desc & 0x7) {
1533 //if (type == DESC_TYPE_STRING) {
1534 case DESC_TYPE_STRING:
1535 STRING_SIZE (skip_size, start);
1536 return start + skip_size;
1537 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1538 case DESC_TYPE_RUN_LENGTH:
1539 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1540 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1541 g_assert (skip_size);
1542 return start + skip_size;
1543 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1544 case DESC_TYPE_ARRAY:
1545 case DESC_TYPE_VECTOR:
1546 OBJ_VECTOR_FOREACH_PTR (vt, start);
1547 skip_size = safe_object_get_size ((MonoObject*)start);
1549 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1550 skip_size *= mono_array_length ((MonoArray*)start);
1551 skip_size += sizeof (MonoArray);
1553 skip_size += (ALLOC_ALIGN - 1);
1554 skip_size &= ~(ALLOC_ALIGN - 1);
1555 return start + skip_size;
1556 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1557 case DESC_TYPE_SMALL_BITMAP:
1558 OBJ_BITMAP_FOREACH_PTR (desc,start);
1559 OBJ_BITMAP_SIZE (skip_size, desc, start);
1560 return start + skip_size;
1561 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1562 case DESC_TYPE_LARGE_BITMAP:
1563 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1564 skip_size = safe_object_get_size ((MonoObject*)start);
1565 skip_size += (ALLOC_ALIGN - 1);
1566 skip_size &= ~(ALLOC_ALIGN - 1);
1567 return start + skip_size;
1568 //} else if (type == DESC_TYPE_COMPLEX) {
1569 case DESC_TYPE_COMPLEX:
1570 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1571 /* this is a complex object */
1572 skip_size = safe_object_get_size ((MonoObject*)start);
1573 skip_size += (ALLOC_ALIGN - 1);
1574 skip_size &= ~(ALLOC_ALIGN - 1);
1575 return start + skip_size;
1576 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1577 case DESC_TYPE_COMPLEX_ARR:
1578 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1579 /* this is an array of complex structs */
1580 skip_size = safe_object_get_size ((MonoObject*)start);
1582 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1583 skip_size *= mono_array_length ((MonoArray*)start);
1584 skip_size += sizeof (MonoArray);
1586 skip_size += (ALLOC_ALIGN - 1);
1587 skip_size &= ~(ALLOC_ALIGN - 1);
1588 return start + skip_size;
1590 g_assert_not_reached ();
1597 * Scan objects in the gray stack until the stack is empty. This should be called
1598 * frequently after each object is copied, to achieve better locality and cache
1602 drain_gray_stack (char *start_addr, char *end_addr)
1604 char *gray_start = gray_first;
1606 while (gray_start < gray_objects) {
1607 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1608 gray_start = scan_object (gray_start, start_addr, end_addr);
1611 gray_first = gray_start;
1617 * Scan the valuetype pointed to by START, described by DESC for references to
1618 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1619 * Returns a pointer to the end of the object.
1622 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1626 /* The descriptors include info about the MonoObject header as well */
1627 start -= sizeof (MonoObject);
1629 switch (desc & 0x7) {
1630 case DESC_TYPE_RUN_LENGTH:
1631 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1632 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1633 g_assert (skip_size);
1634 return start + skip_size;
1635 case DESC_TYPE_SMALL_BITMAP:
1636 OBJ_BITMAP_FOREACH_PTR (desc,start);
1637 OBJ_BITMAP_SIZE (skip_size, desc, start);
1638 return start + skip_size;
1639 case DESC_TYPE_LARGE_BITMAP:
1640 case DESC_TYPE_COMPLEX:
1642 g_assert_not_reached ();
1645 // The other descriptors can't happen with vtypes
1646 g_assert_not_reached ();
1653 * Addresses from start to end are already sorted. This function finds the object header
1654 * for each address and pins the object. The addresses must be inside the passed section.
1655 * Return the number of pinned objects.
1658 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1663 void *last_obj = NULL;
1664 size_t last_obj_size = 0;
1667 void **definitely_pinned = start;
1668 while (start < end) {
1670 /* the range check should be reduntant */
1671 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1672 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1673 /* multiple pointers to the same object */
1674 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1678 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1679 search_start = (void*)section->scan_starts [idx];
1680 if (!search_start || search_start > addr) {
1683 search_start = section->scan_starts [idx];
1684 if (search_start && search_start <= addr)
1687 if (!search_start || search_start > addr)
1688 search_start = start_nursery;
1690 if (search_start < last_obj)
1691 search_start = (char*)last_obj + last_obj_size;
1692 /* now addr should be in an object a short distance from search_start
1693 * Note that search_start must point to zeroed mem or point to an object.
1696 if (!*(void**)search_start) {
1697 mword p = (mword)search_start;
1698 p += sizeof (gpointer);
1699 p += ALLOC_ALIGN - 1;
1700 p &= ~(ALLOC_ALIGN - 1);
1701 search_start = (void*)p;
1704 last_obj = search_start;
1705 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1706 last_obj_size += ALLOC_ALIGN - 1;
1707 last_obj_size &= ~(ALLOC_ALIGN - 1);
1708 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1709 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1710 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1711 pin_object (search_start);
1712 definitely_pinned [count] = search_start;
1716 /* skip to the next object */
1717 search_start = (void*)((char*)search_start + last_obj_size);
1718 } while (search_start <= addr);
1719 /* we either pinned the correct object or we ignored the addr because
1720 * it points to unused zeroed memory.
1726 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1730 static void** pin_queue;
1731 static int pin_queue_size = 0;
1732 static int next_pin_slot = 0;
1737 gap = (gap * 10) / 13;
1738 if (gap == 9 || gap == 10)
1747 compare_addr (const void *a, const void *b)
1749 return *(const void **)a - *(const void **)b;
1753 /* sort the addresses in array in increasing order */
1755 sort_addresses (void **array, int size)
1758 * qsort is slower as predicted.
1759 * qsort (array, size, sizeof (gpointer), compare_addr);
1766 gap = new_gap (gap);
1769 for (i = 0; i < end; i++) {
1771 if (array [i] > array [j]) {
1772 void* val = array [i];
1773 array [i] = array [j];
1778 if (gap == 1 && !swapped)
1783 static G_GNUC_UNUSED void
1784 print_nursery_gaps (void* start_nursery, void *end_nursery)
1787 gpointer first = start_nursery;
1789 for (i = 0; i < next_pin_slot; ++i) {
1790 next = pin_queue [i];
1791 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1795 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1798 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1800 optimize_pin_queue (int start_slot)
1802 void **start, **cur, **end;
1803 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1804 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1805 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1806 if ((next_pin_slot - start_slot) > 1)
1807 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1808 start = cur = pin_queue + start_slot;
1809 end = pin_queue + next_pin_slot;
1812 while (*start == *cur && cur < end)
1816 next_pin_slot = start - pin_queue;
1817 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1818 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1823 realloc_pin_queue (void)
1825 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1826 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1827 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1828 free_internal_mem (pin_queue);
1829 pin_queue = new_pin;
1830 pin_queue_size = new_size;
1831 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1835 * Scan the memory between start and end and queue values which could be pointers
1836 * to the area between start_nursery and end_nursery for later consideration.
1837 * Typically used for thread stacks.
1840 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1843 while (start < end) {
1844 if (*start >= start_nursery && *start < end_nursery) {
1846 * *start can point to the middle of an object
1847 * note: should we handle pointing at the end of an object?
1848 * pinning in C# code disallows pointing at the end of an object
1849 * but there is some small chance that an optimizing C compiler
1850 * may keep the only reference to an object by pointing
1851 * at the end of it. We ignore this small chance for now.
1852 * Pointers to the end of an object are indistinguishable
1853 * from pointers to the start of the next object in memory
1854 * so if we allow that we'd need to pin two objects...
1855 * We queue the pointer in an array, the
1856 * array will then be sorted and uniqued. This way
1857 * we can coalesce several pinning pointers and it should
1858 * be faster since we'd do a memory scan with increasing
1859 * addresses. Note: we can align the address to the allocation
1860 * alignment, so the unique process is more effective.
1862 mword addr = (mword)*start;
1863 addr &= ~(ALLOC_ALIGN - 1);
1864 if (next_pin_slot >= pin_queue_size)
1865 realloc_pin_queue ();
1866 pin_queue [next_pin_slot++] = (void*)addr;
1867 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1872 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1874 #ifdef HAVE_VALGRIND_MEMCHECK_H
1876 * The pinning addresses might come from undefined memory, this is normal. Since they
1877 * are used in lots of functions, we make the memory defined here instead of having
1878 * to add a supression for those functions.
1880 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
1885 * If generation is 0, just mark objects in the nursery, the others we don't care,
1886 * since they are not going to move anyway.
1887 * There are different areas that are scanned for pinned pointers:
1888 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1889 * *) the pinned handle table
1890 * *) the pinned roots
1892 * Note: when we'll use a write barrier for old to new gen references, we need to
1893 * keep track of old gen objects that point to pinned new gen objects because in that
1894 * case the referenced object will be moved maybe at the next collection, but there
1895 * is no write in the old generation area where the pinned object is referenced
1896 * and we may not consider it as reachable.
1898 static G_GNUC_UNUSED void
1899 mark_pinned_objects (int generation)
1904 * Debugging function: find in the conservative roots where @obj is being pinned.
1906 static G_GNUC_UNUSED void
1907 find_pinning_reference (char *obj, size_t size)
1911 char *endobj = obj + size;
1912 for (i = 0; i < roots_hash_size [0]; ++i) {
1913 for (root = roots_hash [0][i]; root; root = root->next) {
1914 /* if desc is non-null it has precise info */
1915 if (!root->root_desc) {
1916 char ** start = (char**)root->start_root;
1917 while (start < (char**)root->end_root) {
1918 if (*start >= obj && *start < endobj) {
1919 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1926 find_pinning_ref_from_thread (obj, size);
1930 * The first thing we do in a collection is to identify pinned objects.
1931 * This function considers all the areas of memory that need to be
1932 * conservatively scanned.
1935 pin_from_roots (void *start_nursery, void *end_nursery)
1939 DEBUG (2, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d/%d entries)\n", (int)roots_size, num_roots_entries [ROOT_TYPE_NORMAL], num_roots_entries [ROOT_TYPE_PINNED]));
1940 /* objects pinned from the API are inside these roots */
1941 for (i = 0; i < roots_hash_size [ROOT_TYPE_PINNED]; ++i) {
1942 for (root = roots_hash [ROOT_TYPE_PINNED][i]; root; root = root->next) {
1943 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1944 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1947 /* now deal with the thread stacks
1948 * in the future we should be able to conservatively scan only:
1949 * *) the cpu registers
1950 * *) the unmanaged stack frames
1951 * *) the _last_ managed stack frame
1952 * *) pointers slots in managed frames
1954 pin_thread_data (start_nursery, end_nursery);
1957 /* Copy function called from user defined mark functions */
1958 static char *user_copy_n_start;
1959 static char *user_copy_n_end;
1962 user_copy (void *addr)
1965 return copy_object (addr, user_copy_n_start, user_copy_n_end);
1971 * The memory area from start_root to end_root contains pointers to objects.
1972 * Their position is precisely described by @desc (this means that the pointer
1973 * can be either NULL or the pointer to the start of an object).
1974 * This functions copies them to to_space updates them.
1977 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1979 switch (desc & ROOT_DESC_TYPE_MASK) {
1980 case ROOT_DESC_BITMAP:
1981 desc >>= ROOT_DESC_TYPE_SHIFT;
1983 if ((desc & 1) && *start_root) {
1984 *start_root = copy_object (*start_root, n_start, n_end);
1985 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root));
1986 drain_gray_stack (n_start, n_end);
1992 case ROOT_DESC_COMPLEX: {
1993 gsize *bitmap_data = complex_descriptors + (desc >> ROOT_DESC_TYPE_SHIFT);
1994 int bwords = (*bitmap_data) - 1;
1995 void **start_run = start_root;
1997 while (bwords-- > 0) {
1998 gsize bmap = *bitmap_data++;
1999 void **objptr = start_run;
2001 if ((bmap & 1) && *objptr) {
2002 *objptr = copy_object (*objptr, n_start, n_end);
2003 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", objptr, *objptr));
2004 drain_gray_stack (n_start, n_end);
2009 start_run += GC_BITS_PER_WORD;
2013 case ROOT_DESC_USER: {
2014 MonoGCMarkFunc marker = user_descriptors [desc >> ROOT_DESC_TYPE_SHIFT];
2016 user_copy_n_start = n_start;
2017 user_copy_n_end = n_end;
2018 marker (start_root, user_copy);
2021 case ROOT_DESC_RUN_LEN:
2022 g_assert_not_reached ();
2024 g_assert_not_reached ();
2029 alloc_fragment (void)
2031 Fragment *frag = fragment_freelist;
2033 fragment_freelist = frag->next;
2037 frag = get_internal_mem (sizeof (Fragment));
2043 * Allocate and setup the data structures needed to be able to allocate objects
2044 * in the nursery. The nursery is stored in nursery_section.
2047 alloc_nursery (void)
2049 GCMemSection *section;
2055 if (nursery_section)
2057 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
2058 /* later we will alloc a larger area for the nursery but only activate
2059 * what we need. The rest will be used as expansion if we have too many pinned
2060 * objects in the existing nursery.
2062 /* FIXME: handle OOM */
2063 section = get_internal_mem (sizeof (GCMemSection));
2065 #ifdef ALIGN_NURSERY
2066 /* Allocate twice the memory to be able to put the nursery at an aligned address */
2067 g_assert (nursery_size == DEFAULT_NURSERY_SIZE);
2069 alloc_size = nursery_size * 2;
2070 data = get_os_memory (alloc_size, TRUE);
2071 nursery_start = (void*)(((mword)data + (1 << DEFAULT_NURSERY_BITS) - 1) & ~((1 << DEFAULT_NURSERY_BITS) - 1));
2072 g_assert ((char*)nursery_start + nursery_size <= ((char*)data + alloc_size));
2073 /* FIXME: Use the remaining size for something else, if it is big enough */
2075 alloc_size = nursery_size;
2076 data = get_os_memory (alloc_size, TRUE);
2077 nursery_start = data;
2079 nursery_real_end = nursery_start + nursery_size;
2080 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
2081 nursery_next = nursery_start;
2082 total_alloc += alloc_size;
2083 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
2084 section->data = section->next_data = data;
2085 section->size = alloc_size;
2086 section->end_data = nursery_real_end;
2087 scan_starts = alloc_size / SCAN_START_SIZE;
2088 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2089 section->num_scan_start = scan_starts;
2090 section->role = MEMORY_ROLE_GEN0;
2092 /* add to the section list */
2093 section->next = section_list;
2094 section_list = section;
2096 nursery_section = section;
2098 /* Setup the single first large fragment */
2099 frag = alloc_fragment ();
2100 frag->fragment_start = nursery_start;
2101 frag->fragment_limit = nursery_start;
2102 frag->fragment_end = nursery_real_end;
2103 nursery_frag_real_end = nursery_real_end;
2104 /* FIXME: frag here is lost */
2108 scan_finalizer_entries (FinalizeEntry *list, char *start, char *end) {
2111 for (fin = list; fin; fin = fin->next) {
2114 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2115 fin->object = copy_object (fin->object, start, end);
2120 * Update roots in the old generation. Since we currently don't have the
2121 * info from the write barriers, we just scan all the objects.
2123 static G_GNUC_UNUSED void
2124 scan_old_generation (char *start, char* end)
2126 GCMemSection *section;
2127 LOSObject *big_object;
2130 for (section = section_list; section; section = section->next) {
2131 if (section == nursery_section)
2133 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
2134 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
2136 while (p < section->next_data) {
2141 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
2142 p = scan_object (p, start, end);
2145 /* scan the old object space, too */
2146 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2147 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2148 scan_object (big_object->data, start, end);
2150 /* scan the list of objects ready for finalization */
2151 scan_finalizer_entries (fin_ready_list, start, end);
2152 scan_finalizer_entries (critical_fin_list, start, end);
2155 static mword fragment_total = 0;
2157 * We found a fragment of free memory in the nursery: memzero it and if
2158 * it is big enough, add it to the list of fragments that can be used for
2162 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
2165 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
2166 /* memsetting just the first chunk start is bound to provide better cache locality */
2167 if (nursery_clear_policy == CLEAR_AT_GC)
2168 memset (frag_start, 0, frag_size);
2169 /* Not worth dealing with smaller fragments: need to tune */
2170 if (frag_size >= FRAGMENT_MIN_SIZE) {
2171 fragment = alloc_fragment ();
2172 fragment->fragment_start = frag_start;
2173 fragment->fragment_limit = frag_start;
2174 fragment->fragment_end = frag_end;
2175 fragment->next = nursery_fragments;
2176 nursery_fragments = fragment;
2177 fragment_total += frag_size;
2179 /* Clear unused fragments, pinning depends on this */
2180 memset (frag_start, 0, frag_size);
2185 scan_needed_big_objects (char *start_addr, char *end_addr)
2187 LOSObject *big_object;
2189 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2190 if (!big_object->scanned && object_is_pinned (big_object->data)) {
2191 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2192 scan_object (big_object->data, start_addr, end_addr);
2193 big_object->scanned = TRUE;
2201 finish_gray_stack (char *start_addr, char *end_addr)
2205 int fin_ready, bigo_scanned_num;
2209 * We copied all the reachable objects. Now it's the time to copy
2210 * the objects that were not referenced by the roots, but by the copied objects.
2211 * we built a stack of objects pointed to by gray_start: they are
2212 * additional roots and we may add more items as we go.
2213 * We loop until gray_start == gray_objects which means no more objects have
2214 * been added. Note this is iterative: no recursion is involved.
2215 * We need to walk the LO list as well in search of marked big objects
2216 * (use a flag since this is needed only on major collections). We need to loop
2217 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2218 * To achieve better cache locality and cache usage, we drain the gray stack
2219 * frequently, after each object is copied, and just finish the work here.
2221 gray_start = gray_first;
2222 while (gray_start < gray_objects) {
2223 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2224 gray_start = scan_object (gray_start, start_addr, end_addr);
2227 //scan_old_generation (start_addr, end_addr);
2228 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2229 /* walk the finalization queue and move also the objects that need to be
2230 * finalized: use the finalized objects as new roots so the objects they depend
2231 * on are also not reclaimed. As with the roots above, only objects in the nursery
2232 * are marked/copied.
2233 * We need a loop here, since objects ready for finalizers may reference other objects
2234 * that are fin-ready. Speedup with a flag?
2237 fin_ready = num_ready_finalizers;
2238 finalize_in_range (start_addr, end_addr);
2239 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2241 /* drain the new stack that might have been created */
2242 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2243 while (gray_start < gray_objects) {
2244 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2245 gray_start = scan_object (gray_start, start_addr, end_addr);
2247 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2249 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2250 to_space = gray_start;
2251 to_space_section->next_data = to_space;
2254 * handle disappearing links
2255 * Note we do this after checking the finalization queue because if an object
2256 * survives (at least long enough to be finalized) we don't clear the link.
2257 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2258 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2261 null_link_in_range (start_addr, end_addr);
2263 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2266 static int last_num_pinned = 0;
2269 build_nursery_fragments (int start_pin, int end_pin)
2271 char *frag_start, *frag_end;
2275 /* FIXME: handle non-NULL fragment_freelist */
2276 fragment_freelist = nursery_fragments;
2277 nursery_fragments = NULL;
2278 frag_start = nursery_start;
2280 /* clear scan starts */
2281 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2282 for (i = start_pin; i < end_pin; ++i) {
2283 frag_end = pin_queue [i];
2284 /* remove the pin bit from pinned objects */
2285 unpin_object (frag_end);
2286 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2287 frag_size = frag_end - frag_start;
2289 add_nursery_frag (frag_size, frag_start, frag_end);
2290 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2291 frag_size += ALLOC_ALIGN - 1;
2292 frag_size &= ~(ALLOC_ALIGN - 1);
2293 frag_start = (char*)pin_queue [i] + frag_size;
2295 * pin_queue [i] might point to a half-constructed string or vector whose
2296 * length field is not set. In that case, frag_start points inside the
2297 * (zero initialized) object. Find the end of the object by scanning forward.
2300 if (is_maybe_half_constructed (pin_queue [i])) {
2303 /* This is also hit for zero length arrays/strings */
2305 /* Find the end of the TLAB which contained this allocation */
2306 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2309 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2310 frag_start += sizeof (mword);
2313 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2314 * half constructed object.
2319 nursery_last_pinned_end = frag_start;
2320 frag_end = nursery_real_end;
2321 frag_size = frag_end - frag_start;
2323 add_nursery_frag (frag_size, frag_start, frag_end);
2324 if (!nursery_fragments) {
2325 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2326 for (i = start_pin; i < end_pin; ++i) {
2327 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2332 nursery_next = nursery_frag_real_end = NULL;
2334 /* Clear TLABs for all threads */
2338 /* FIXME: later reduce code duplication here with the above
2339 * We don't keep track of section fragments for non-nursery sections yet, so
2343 build_section_fragments (GCMemSection *section)
2346 char *frag_start, *frag_end;
2349 /* clear scan starts */
2350 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2351 frag_start = section->data;
2352 section->next_data = section->data;
2353 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2354 frag_end = pin_queue [i];
2355 /* remove the pin bit from pinned objects */
2356 unpin_object (frag_end);
2357 if (frag_end >= section->data + section->size) {
2358 frag_end = section->data + section->size;
2360 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2362 frag_size = frag_end - frag_start;
2364 memset (frag_start, 0, frag_size);
2365 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2366 frag_size += ALLOC_ALIGN - 1;
2367 frag_size &= ~(ALLOC_ALIGN - 1);
2368 frag_start = (char*)pin_queue [i] + frag_size;
2369 section->next_data = MAX (section->next_data, frag_start);
2371 frag_end = section->end_data;
2372 frag_size = frag_end - frag_start;
2374 memset (frag_start, 0, frag_size);
2378 scan_from_registered_roots (char *addr_start, char *addr_end, int root_type)
2382 for (i = 0; i < roots_hash_size [root_type]; ++i) {
2383 for (root = roots_hash [root_type][i]; root; root = root->next) {
2384 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2385 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2391 * Collect objects in the nursery.
2394 collect_nursery (size_t requested_size)
2396 GCMemSection *section;
2397 size_t max_garbage_amount;
2399 char *orig_nursery_next;
2401 TV_DECLARE (all_atv);
2402 TV_DECLARE (all_btv);
2407 orig_nursery_next = nursery_next;
2408 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2409 /* FIXME: optimize later to use the higher address where an object can be present */
2410 nursery_next = MAX (nursery_next, nursery_real_end);
2412 if (consistency_check_at_minor_collection)
2413 check_consistency ();
2415 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2416 max_garbage_amount = nursery_next - nursery_start;
2418 /* Clear all remaining nursery fragments, pinning depends on this */
2419 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2420 g_assert (orig_nursery_next <= nursery_frag_real_end);
2421 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2422 for (frag = nursery_fragments; frag; frag = frag->next) {
2423 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2428 * not enough room in the old generation to store all the possible data from
2429 * the nursery in a single continuous space.
2430 * We reset to_space if we allocated objects in degraded mode.
2432 if (to_space_section)
2433 to_space = gray_objects = gray_first = to_space_section->next_data;
2434 if ((to_space_end - to_space) < max_garbage_amount) {
2435 section = alloc_section (nursery_section->size * 4);
2436 g_assert (nursery_section->size >= max_garbage_amount);
2437 to_space = gray_objects = gray_first = section->next_data;
2438 to_space_end = section->end_data;
2439 to_space_section = section;
2441 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2442 nursery_section->next_data = nursery_next;
2445 mono_stats.minor_gc_count ++;
2446 /* world must be stopped already */
2447 TV_GETTIME (all_atv);
2449 /* pin from pinned handles */
2450 pin_from_roots (nursery_start, nursery_next);
2451 /* identify pinned objects */
2452 optimize_pin_queue (0);
2453 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2455 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2456 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2459 * walk all the roots and copy the young objects to the old generation,
2460 * starting from to_space
2463 scan_from_remsets (nursery_start, nursery_next);
2464 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2466 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2468 /* the pinned objects are roots */
2469 for (i = 0; i < next_pin_slot; ++i) {
2470 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2471 scan_object (pin_queue [i], nursery_start, nursery_next);
2473 /* registered roots, this includes static fields */
2474 scan_from_registered_roots (nursery_start, nursery_next, ROOT_TYPE_NORMAL);
2475 /* alloc_pinned objects */
2476 scan_from_pinned_objects (nursery_start, nursery_next);
2478 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2480 finish_gray_stack (nursery_start, nursery_next);
2482 /* walk the pin_queue, build up the fragment list of free memory, unmark
2483 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2486 build_nursery_fragments (0, next_pin_slot);
2488 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2490 TV_GETTIME (all_btv);
2491 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2493 /* prepare the pin queue for the next collection */
2494 last_num_pinned = next_pin_slot;
2496 if (fin_ready_list || critical_fin_list) {
2497 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2498 mono_gc_finalize_notify ();
2503 major_collection (void)
2505 GCMemSection *section, *prev_section;
2506 LOSObject *bigobj, *prevbo;
2511 TV_DECLARE (all_atv);
2512 TV_DECLARE (all_btv);
2515 /* FIXME: only use these values for the precise scan
2516 * note that to_space pointers should be excluded anyway...
2518 char *heap_start = NULL;
2519 char *heap_end = (char*)-1;
2520 size_t copy_space_required = 0;
2523 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2525 mono_stats.major_gc_count ++;
2527 /* Clear all remaining nursery fragments, pinning depends on this */
2528 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2529 g_assert (nursery_next <= nursery_frag_real_end);
2530 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2531 for (frag = nursery_fragments; frag; frag = frag->next) {
2532 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2537 * FIXME: implement Mark/Compact
2538 * Until that is done, we can just apply mostly the same alg as for the nursery:
2539 * this means we need a big section to potentially copy all the other sections, so
2540 * it is not ideal specially with large heaps.
2542 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2543 collect_nursery (0);
2546 TV_GETTIME (all_atv);
2547 /* FIXME: make sure the nursery next_data ptr is updated */
2548 nursery_section->next_data = nursery_real_end;
2549 /* we should also coalesce scanning from sections close to each other
2550 * and deal with pointers outside of the sections later.
2552 /* The remsets are not useful for a major collection */
2554 /* world must be stopped already */
2556 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2557 for (section = section_list; section; section = section->next) {
2558 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2559 pin_from_roots (section->data, section->next_data);
2560 if (count != next_pin_slot) {
2562 optimize_pin_queue (count);
2563 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2564 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2565 section->pin_queue_end = next_pin_slot = count + reduced_to;
2567 copy_space_required += (char*)section->next_data - (char*)section->data;
2569 /* identify possible pointers to the insize of large objects */
2570 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2571 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2572 count = next_pin_slot;
2573 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2574 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2575 if (next_pin_slot != count) {
2576 next_pin_slot = count;
2577 pin_object (bigobj->data);
2578 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2581 /* look for pinned addresses for pinned-alloc objects */
2582 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2583 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2584 count = next_pin_slot;
2585 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2586 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2587 if (next_pin_slot != count) {
2588 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2589 next_pin_slot = count;
2594 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2595 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2597 /* allocate the big to space */
2598 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2599 section = alloc_section (copy_space_required);
2600 to_space = gray_objects = gray_first = section->next_data;
2601 to_space_end = section->end_data;
2602 to_space_section = section;
2604 /* the old generation doesn't need to be scanned (no remembered sets or card
2605 * table needed either): the only objects that must survive are those pinned and
2606 * those referenced by the precise roots.
2607 * mark any section without pinned objects, so we can free it since we will be able to
2608 * move all the objects.
2610 /* the pinned objects are roots (big objects are included in this list, too) */
2611 for (i = 0; i < next_pin_slot; ++i) {
2612 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2613 scan_object (pin_queue [i], heap_start, heap_end);
2615 /* registered roots, this includes static fields */
2616 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_NORMAL);
2617 scan_from_registered_roots (heap_start, heap_end, ROOT_TYPE_WBARRIER);
2618 /* alloc_pinned objects */
2619 scan_from_pinned_objects (heap_start, heap_end);
2620 /* scan the list of objects ready for finalization */
2621 scan_finalizer_entries (fin_ready_list, heap_start, heap_end);
2622 scan_finalizer_entries (critical_fin_list, heap_start, heap_end);
2624 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2626 /* we need to go over the big object list to see if any was marked and scan it
2627 * And we need to make this in a loop, considering that objects referenced by finalizable
2628 * objects could reference big objects (this happens in finish_gray_stack ())
2630 scan_needed_big_objects (heap_start, heap_end);
2631 /* all the objects in the heap */
2632 finish_gray_stack (heap_start, heap_end);
2634 /* sweep the big objects list */
2636 for (bigobj = los_object_list; bigobj;) {
2637 if (object_is_pinned (bigobj->data)) {
2638 unpin_object (bigobj->data);
2639 bigobj->scanned = FALSE;
2642 /* not referenced anywhere, so we can free it */
2644 prevbo->next = bigobj->next;
2646 los_object_list = bigobj->next;
2648 bigobj = bigobj->next;
2649 free_large_object (to_free);
2653 bigobj = bigobj->next;
2655 /* unpin objects from the pinned chunks and free the unmarked ones */
2656 sweep_pinned_objects ();
2658 /* free the unused sections */
2659 prev_section = NULL;
2660 for (section = section_list; section;) {
2661 /* to_space doesn't need handling here and the nursery is special */
2662 if (section == to_space_section || section == nursery_section) {
2663 prev_section = section;
2664 section = section->next;
2667 /* no pinning object, so the section is free */
2668 if (section->pin_queue_start == section->pin_queue_end) {
2669 GCMemSection *to_free;
2671 prev_section->next = section->next;
2673 section_list = section->next;
2675 section = section->next;
2676 free_mem_section (to_free);
2679 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2680 build_section_fragments (section);
2682 prev_section = section;
2683 section = section->next;
2686 /* walk the pin_queue, build up the fragment list of free memory, unmark
2687 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2690 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2692 TV_GETTIME (all_btv);
2693 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2694 /* prepare the pin queue for the next collection */
2696 if (fin_ready_list || critical_fin_list) {
2697 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2698 mono_gc_finalize_notify ();
2703 * Allocate a new section of memory to be used as old generation.
2705 static GCMemSection*
2706 alloc_section (size_t size)
2708 GCMemSection *section;
2711 size_t new_size = next_section_size;
2713 if (size > next_section_size) {
2715 new_size += pagesize - 1;
2716 new_size &= ~(pagesize - 1);
2718 section_size_used++;
2719 if (section_size_used > 3) {
2720 section_size_used = 0;
2721 next_section_size *= 2;
2722 if (next_section_size > max_section_size)
2723 next_section_size = max_section_size;
2725 section = get_internal_mem (sizeof (GCMemSection));
2726 data = get_os_memory (new_size, TRUE);
2727 section->data = section->next_data = data;
2728 section->size = new_size;
2729 section->end_data = data + new_size;
2730 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2731 total_alloc += new_size;
2732 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2733 section->data = data;
2734 section->size = new_size;
2735 scan_starts = new_size / SCAN_START_SIZE;
2736 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2737 section->num_scan_start = scan_starts;
2738 section->role = MEMORY_ROLE_GEN1;
2740 /* add to the section list */
2741 section->next = section_list;
2742 section_list = section;
2748 free_mem_section (GCMemSection *section)
2750 char *data = section->data;
2751 size_t size = section->size;
2752 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2753 free_os_memory (data, size);
2754 free_internal_mem (section);
2755 total_alloc -= size;
2759 * When deciding if it's better to collect or to expand, keep track
2760 * of how much garbage was reclaimed with the last collection: if it's too
2762 * This is called when we could not allocate a small object.
2764 static void __attribute__((noinline))
2765 minor_collect_or_expand_inner (size_t size)
2767 int do_minor_collection = 1;
2769 if (!nursery_section) {
2773 if (do_minor_collection) {
2775 collect_nursery (size);
2776 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2778 /* this also sets the proper pointers for the next allocation */
2779 if (!search_fragment_for_size (size)) {
2781 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2782 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2783 for (i = 0; i < last_num_pinned; ++i) {
2784 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2789 //report_internal_mem_usage ();
2793 * ######################################################################
2794 * ######## Memory allocation from the OS
2795 * ######################################################################
2796 * This section of code deals with getting memory from the OS and
2797 * allocating memory for GC-internal data structures.
2798 * Internal memory can be handled with a freelist for small objects.
2802 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2803 * This must not require any lock.
2806 get_os_memory (size_t size, int activate)
2809 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2811 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2812 size += pagesize - 1;
2813 size &= ~(pagesize - 1);
2814 ptr = mono_valloc (0, size, prot_flags);
2819 * Free the memory returned by get_os_memory (), returning it to the OS.
2822 free_os_memory (void *addr, size_t size)
2824 munmap (addr, size);
2831 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2833 int i, free_pages, num_free, free_mem;
2835 for (i = 0; i < chunk->num_pages; ++i) {
2836 if (!chunk->page_sizes [i])
2839 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2840 free_mem = FREELIST_PAGESIZE * free_pages;
2841 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2842 if (!chunk->free_list [i])
2845 p = chunk->free_list [i];
2850 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2851 free_mem += freelist_sizes [i] * num_free;
2853 printf ("\tfree memory in chunk: %d\n", free_mem);
2859 static G_GNUC_UNUSED void
2860 report_internal_mem_usage (void) {
2863 printf ("Internal memory usage:\n");
2865 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2866 report_pinned_chunk (chunk, i++);
2868 printf ("Pinned memory usage:\n");
2870 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2871 report_pinned_chunk (chunk, i++);
2876 * the array of pointers from @start to @end contains conservative
2877 * pointers to objects inside @chunk: mark each referenced object
2881 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2883 for (; start < end; start++) {
2884 char *addr = *start;
2885 int offset = (char*)addr - (char*)chunk;
2886 int page = offset / FREELIST_PAGESIZE;
2887 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2888 int slot_size = chunk->page_sizes [page];
2890 /* the page is not allocated */
2893 /* would be faster if we restrict the sizes to power of two,
2894 * but that's a waste of memory: need to measure. it could reduce
2895 * fragmentation since there are less pages needed, if for example
2896 * someone interns strings of each size we end up with one page per
2897 * interned string (still this is just ~40 KB): with more fine-grained sizes
2898 * this increases the number of used pages.
2901 obj_offset /= slot_size;
2902 obj_offset *= slot_size;
2903 addr = (char*)chunk->start_data + obj_offset;
2905 obj_offset /= slot_size;
2906 obj_offset *= slot_size;
2907 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2910 /* if the vtable is inside the chunk it's on the freelist, so skip */
2911 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2913 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2919 sweep_pinned_objects (void)
2926 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2927 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2928 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2929 for (i = 0; i < chunk->num_pages; ++i) {
2930 obj_size = chunk->page_sizes [i];
2933 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2934 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2935 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2936 while (p + obj_size <= endp) {
2938 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2939 /* if the first word (the vtable) is outside the chunk we have an object */
2940 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2941 if (object_is_pinned (ptr)) {
2943 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2945 /* FIXME: add to freelist */
2946 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2956 scan_from_pinned_objects (char *addr_start, char *addr_end)
2963 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2964 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2965 DEBUG (6, fprintf (gc_debug_file, "Scanning pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2966 for (i = 0; i < chunk->num_pages; ++i) {
2967 obj_size = chunk->page_sizes [i];
2970 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2971 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2972 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2973 while (p + obj_size <= endp) {
2975 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2976 /* if the first word (the vtable) is outside the chunk we have an object */
2977 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2978 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of alloc_pinned %p (%s)\n", i, ptr, safe_name (ptr)));
2979 // FIXME: Put objects without references into separate chunks
2980 // which do not need to be scanned
2981 scan_object ((char*)ptr, addr_start, addr_end);
2990 * Find the slot number in the freelist for memory chunks that
2991 * can contain @size objects.
2994 slot_for_size (size_t size)
2997 /* do a binary search or lookup table later. */
2998 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2999 if (freelist_sizes [slot] >= size)
3002 g_assert_not_reached ();
3007 * Build a free list for @size memory chunks from the memory area between
3008 * start_page and end_page.
3011 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
3015 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
3016 p = (void**)start_page;
3017 end = (void**)(end_page - size);
3018 g_assert (!chunk->free_list [slot]);
3019 chunk->free_list [slot] = p;
3020 while ((char*)p + size <= (char*)end) {
3022 *p = (void*)((char*)p + size);
3026 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
3030 alloc_pinned_chunk (size_t size)
3035 size += pagesize; /* at least one page */
3036 size += pagesize - 1;
3037 size &= ~(pagesize - 1);
3038 if (size < PINNED_CHUNK_MIN_SIZE * 2)
3039 size = PINNED_CHUNK_MIN_SIZE * 2;
3040 chunk = get_os_memory (size, TRUE);
3041 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
3042 total_alloc += size;
3044 /* setup the bookeeping fields */
3045 chunk->num_pages = size / FREELIST_PAGESIZE;
3046 offset = G_STRUCT_OFFSET (PinnedChunk, data);
3047 chunk->page_sizes = (void*)((char*)chunk + offset);
3048 offset += sizeof (int) * chunk->num_pages;
3049 offset += ALLOC_ALIGN - 1;
3050 offset &= ~(ALLOC_ALIGN - 1);
3051 chunk->free_list = (void*)((char*)chunk + offset);
3052 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
3053 offset += ALLOC_ALIGN - 1;
3054 offset &= ~(ALLOC_ALIGN - 1);
3055 chunk->start_data = (void*)((char*)chunk + offset);
3057 /* allocate the first page to the freelist */
3058 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
3059 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
3060 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
3061 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
3062 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
3066 /* assumes freelist for slot is empty, so try to alloc a new page */
3068 get_chunk_freelist (PinnedChunk *chunk, int slot)
3072 p = chunk->free_list [slot];
3074 chunk->free_list [slot] = *p;
3077 for (i = 0; i < chunk->num_pages; ++i) {
3079 if (chunk->page_sizes [i])
3081 size = freelist_sizes [slot];
3082 chunk->page_sizes [i] = size;
3083 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
3087 p = chunk->free_list [slot];
3089 chunk->free_list [slot] = *p;
3096 alloc_from_freelist (size_t size)
3100 PinnedChunk *pchunk;
3101 slot = slot_for_size (size);
3102 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
3103 g_assert (size <= freelist_sizes [slot]);
3104 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3105 void **p = pchunk->free_list [slot];
3107 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
3108 pchunk->free_list [slot] = *p;
3112 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
3113 res = get_chunk_freelist (pchunk, slot);
3117 pchunk = alloc_pinned_chunk (size);
3118 /* FIXME: handle OOM */
3119 pchunk->next = pinned_chunk_list;
3120 pinned_chunk_list = pchunk;
3121 res = get_chunk_freelist (pchunk, slot);
3125 /* used for the GC-internal data structures */
3126 /* FIXME: add support for bigger sizes by allocating more than one page
3130 get_internal_mem (size_t size)
3132 return calloc (1, size);
3136 PinnedChunk *pchunk;
3137 slot = slot_for_size (size);
3138 g_assert (size <= freelist_sizes [slot]);
3139 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3140 void **p = pchunk->free_list [slot];
3142 pchunk->free_list [slot] = *p;
3146 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3147 res = get_chunk_freelist (pchunk, slot);
3151 pchunk = alloc_pinned_chunk (size);
3152 /* FIXME: handle OOM */
3153 pchunk->next = internal_chunk_list;
3154 internal_chunk_list = pchunk;
3155 res = get_chunk_freelist (pchunk, slot);
3161 free_internal_mem (void *addr)
3165 PinnedChunk *pchunk;
3166 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3167 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
3168 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
3169 int offset = (char*)addr - (char*)pchunk;
3170 int page = offset / FREELIST_PAGESIZE;
3171 int slot = slot_for_size (pchunk->page_sizes [page]);
3173 *p = pchunk->free_list [slot];
3174 pchunk->free_list [slot] = p;
3178 printf ("free of %p failed\n", addr);
3179 g_assert_not_reached ();
3184 * ######################################################################
3185 * ######## Object allocation
3186 * ######################################################################
3187 * This section of code deals with allocating memory for objects.
3188 * There are several ways:
3189 * *) allocate large objects
3190 * *) allocate normal objects
3191 * *) fast lock-free allocation
3192 * *) allocation of pinned objects
3196 free_large_object (LOSObject *obj)
3198 size_t size = obj->size;
3199 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
3201 los_memory_usage -= size;
3202 size += sizeof (LOSObject);
3203 size += pagesize - 1;
3204 size &= ~(pagesize - 1);
3205 total_alloc -= size;
3207 free_os_memory (obj, size);
3211 * Objects with size >= 64KB are allocated in the large object space.
3212 * They are currently kept track of with a linked list.
3213 * They don't move, so there is no need to pin them during collection
3214 * and we avoid the memcpy overhead.
3216 static void* __attribute__((noinline))
3217 alloc_large_inner (MonoVTable *vtable, size_t size)
3222 int just_did_major_gc = FALSE;
3224 if (los_memory_usage > next_los_collection) {
3225 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
3226 just_did_major_gc = TRUE;
3228 major_collection ();
3230 /* later increase based on a percent of the heap size */
3231 next_los_collection = los_memory_usage + 5*1024*1024;
3234 alloc_size += sizeof (LOSObject);
3235 alloc_size += pagesize - 1;
3236 alloc_size &= ~(pagesize - 1);
3237 /* FIXME: handle OOM */
3238 obj = get_os_memory (alloc_size, TRUE);
3240 vtslot = (void**)obj->data;
3242 total_alloc += alloc_size;
3243 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3244 obj->next = los_object_list;
3245 los_object_list = obj;
3246 los_memory_usage += size;
3248 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3252 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3253 * an object of size @size
3254 * Return FALSE if not found (which means we need a collection)
3257 search_fragment_for_size (size_t size)
3259 Fragment *frag, *prev;
3260 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3262 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3263 /* Clear the remaining space, pinning depends on this */
3264 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3267 for (frag = nursery_fragments; frag; frag = frag->next) {
3268 if (size <= (frag->fragment_end - frag->fragment_start)) {
3269 /* remove from the list */
3271 prev->next = frag->next;
3273 nursery_fragments = frag->next;
3274 nursery_next = frag->fragment_start;
3275 nursery_frag_real_end = frag->fragment_end;
3277 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3278 frag->next = fragment_freelist;
3279 fragment_freelist = frag;
3288 * size is already rounded up and we hold the GC lock.
3291 alloc_degraded (MonoVTable *vtable, size_t size)
3293 GCMemSection *section;
3295 for (section = section_list; section; section = section->next) {
3296 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3297 p = (void**)section->next_data;
3302 section = alloc_section (nursery_section->size * 4);
3303 /* FIXME: handle OOM */
3304 p = (void**)section->next_data;
3306 section->next_data += size;
3307 degraded_mode += size;
3308 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3314 * Provide a variant that takes just the vtable for small fixed-size objects.
3315 * The aligned size is already computed and stored in vt->gc_descr.
3316 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3317 * processing. We can keep track of where objects start, for example,
3318 * so when we scan the thread stacks for pinned objects, we can start
3319 * a search for the pinned object in SCAN_START_SIZE chunks.
3322 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3324 /* FIXME: handle OOM */
3329 size += ALLOC_ALIGN - 1;
3330 size &= ~(ALLOC_ALIGN - 1);
3332 g_assert (vtable->gc_descr);
3334 if (G_UNLIKELY (collect_before_allocs)) {
3337 if (nursery_section) {
3340 update_current_thread_stack (&dummy);
3342 collect_nursery (0);
3344 if (!degraded_mode && !search_fragment_for_size (size)) {
3346 g_assert_not_reached ();
3352 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3354 p = (void**)tlab_next;
3355 /* FIXME: handle overflow */
3356 new_next = (char*)p + size;
3357 tlab_next = new_next;
3359 if (G_LIKELY (new_next < tlab_temp_end)) {
3363 * FIXME: We might need a memory barrier here so the change to tlab_next is
3364 * visible before the vtable store.
3367 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3375 /* there are two cases: the object is too big or we run out of space in the TLAB */
3376 /* we also reach here when the thread does its first allocation after a minor
3377 * collection, since the tlab_ variables are initialized to NULL.
3378 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3379 * objects that need finalizers can have the high bit set in their size
3380 * so the above check fails and we can readily add the object to the queue.
3381 * This avoids taking again the GC lock when registering, but this is moot when
3382 * doing thread-local allocation, so it may not be a good idea.
3385 if (size > MAX_SMALL_OBJ_SIZE) {
3386 /* get ready for possible collection */
3387 update_current_thread_stack (&dummy);
3389 p = alloc_large_inner (vtable, size);
3391 if (tlab_next >= tlab_real_end) {
3393 * Run out of space in the TLAB. When this happens, some amount of space
3394 * remains in the TLAB, but not enough to satisfy the current allocation
3395 * request. Currently, we retire the TLAB in all cases, later we could
3396 * keep it if the remaining space is above a treshold, and satisfy the
3397 * allocation directly from the nursery.
3400 /* when running in degraded mode, we continue allocing that way
3401 * for a while, to decrease the number of useless nursery collections.
3403 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3404 p = alloc_degraded (vtable, size);
3409 if (size > tlab_size) {
3410 /* Allocate directly from the nursery */
3411 if (nursery_next + size >= nursery_frag_real_end) {
3412 if (!search_fragment_for_size (size)) {
3413 /* get ready for possible collection */
3414 update_current_thread_stack (&dummy);
3415 minor_collect_or_expand_inner (size);
3416 if (degraded_mode) {
3417 p = alloc_degraded (vtable, size);
3424 p = (void*)nursery_next;
3425 nursery_next += size;
3426 if (nursery_next > nursery_frag_real_end) {
3431 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3432 memset (p, 0, size);
3435 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3437 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3438 res = search_fragment_for_size (tlab_size);
3440 /* get ready for possible collection */
3441 update_current_thread_stack (&dummy);
3442 minor_collect_or_expand_inner (tlab_size);
3443 if (degraded_mode) {
3444 p = alloc_degraded (vtable, size);
3451 /* Allocate a new TLAB from the current nursery fragment */
3452 tlab_start = nursery_next;
3453 nursery_next += tlab_size;
3454 tlab_next = tlab_start;
3455 tlab_real_end = tlab_start + tlab_size;
3456 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3458 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3459 memset (tlab_start, 0, tlab_size);
3461 /* Allocate from the TLAB */
3462 p = (void*)tlab_next;
3464 g_assert (tlab_next <= tlab_real_end);
3466 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3469 /* Reached tlab_temp_end */
3471 /* record the scan start so we can find pinned objects more easily */
3472 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3473 /* we just bump tlab_temp_end as well */
3474 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3475 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3479 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3488 * To be used for interned strings and possibly MonoThread, reflection handles.
3489 * We may want to explicitly free these objects.
3492 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3494 /* FIXME: handle OOM */
3496 size += ALLOC_ALIGN - 1;
3497 size &= ~(ALLOC_ALIGN - 1);
3499 if (size > MAX_FREELIST_SIZE) {
3500 update_current_thread_stack (&p);
3501 /* large objects are always pinned anyway */
3502 p = alloc_large_inner (vtable, size);
3504 p = alloc_from_freelist (size);
3505 memset (p, 0, size);
3507 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3514 * ######################################################################
3515 * ######## Finalization support
3516 * ######################################################################
3520 * this is valid for the nursery: if the object has been forwarded it means it's
3521 * still refrenced from a root. If it is pinned it's still alive as well.
3522 * Return TRUE if @obj is ready to be finalized.
3524 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3527 is_critical_finalizer (FinalizeEntry *entry)
3532 if (!mono_defaults.critical_finalizer_object)
3535 obj = entry->object;
3536 class = ((MonoVTable*)LOAD_VTABLE (obj))->klass;
3538 return mono_class_has_parent (class, mono_defaults.critical_finalizer_object);
3542 queue_finalization_entry (FinalizeEntry *entry) {
3543 if (is_critical_finalizer (entry)) {
3544 entry->next = critical_fin_list;
3545 critical_fin_list = entry;
3547 entry->next = fin_ready_list;
3548 fin_ready_list = entry;
3553 finalize_in_range (char *start, char *end)
3555 FinalizeEntry *entry, *prev;
3559 for (i = 0; i < finalizable_hash_size; ++i) {
3561 for (entry = finalizable_hash [i]; entry;) {
3562 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3563 if (object_is_fin_ready (entry->object)) {
3565 FinalizeEntry *next;
3566 /* remove and put in fin_ready_list */
3568 prev->next = entry->next;
3570 finalizable_hash [i] = entry->next;
3572 num_ready_finalizers++;
3573 num_registered_finalizers--;
3574 queue_finalization_entry (entry);
3575 /* Make it survive */
3576 from = entry->object;
3577 entry->object = copy_object (entry->object, start, end);
3578 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3582 /* update pointer */
3583 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3584 entry->object = copy_object (entry->object, start, end);
3588 entry = entry->next;
3594 null_link_in_range (char *start, char *end)
3596 DisappearingLink *entry, *prev;
3598 for (i = 0; i < disappearing_link_hash_size; ++i) {
3600 for (entry = disappearing_link_hash [i]; entry;) {
3601 char *object = DISLINK_OBJECT (entry);
3602 if (object >= start && object < end && (object < to_space || object >= to_space_end)) {
3603 if (object_is_fin_ready (object)) {
3604 void **p = entry->link;
3605 DisappearingLink *old;
3607 /* remove from list */
3609 prev->next = entry->next;
3611 disappearing_link_hash [i] = entry->next;
3612 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, object));
3614 free_internal_mem (entry);
3616 num_disappearing_links--;
3619 /* update pointer if it's moved
3620 * FIXME: what if an object is moved earlier?
3622 *entry->link = HIDE_POINTER (copy_object (object, start, end));
3623 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->link, DISLINK_OBJECT (entry)));
3627 entry = entry->next;
3633 * mono_gc_finalizers_for_domain:
3634 * @domain: the unloading appdomain
3635 * @out_array: output array
3636 * @out_size: size of output array
3638 * Store inside @out_array up to @out_size objects that belong to the unloading
3639 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3640 * until it returns 0.
3641 * The items are removed from the finalizer data structure, so the caller is supposed
3643 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3646 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3648 FinalizeEntry *entry, *prev;
3650 if (no_finalize || !out_size || !out_array)
3654 for (i = 0; i < finalizable_hash_size; ++i) {
3656 for (entry = finalizable_hash [i]; entry;) {
3657 if (mono_object_domain (entry->object) == domain) {
3658 FinalizeEntry *next;
3659 /* remove and put in out_array */
3661 prev->next = entry->next;
3663 finalizable_hash [i] = entry->next;
3665 num_registered_finalizers--;
3666 out_array [count ++] = entry->object;
3667 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3669 if (count == out_size) {
3676 entry = entry->next;
3684 rehash_fin_table (void)
3688 FinalizeEntry **new_hash;
3689 FinalizeEntry *entry, *next;
3690 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3692 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3693 for (i = 0; i < finalizable_hash_size; ++i) {
3694 for (entry = finalizable_hash [i]; entry; entry = next) {
3695 hash = mono_object_hash (entry->object) % new_size;
3697 entry->next = new_hash [hash];
3698 new_hash [hash] = entry;
3701 free_internal_mem (finalizable_hash);
3702 finalizable_hash = new_hash;
3703 finalizable_hash_size = new_size;
3707 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3709 FinalizeEntry *entry, *prev;
3713 g_assert (user_data == NULL || user_data == mono_gc_run_finalize);
3714 hash = mono_object_hash (obj);
3716 if (num_registered_finalizers >= finalizable_hash_size * 2)
3717 rehash_fin_table ();
3718 hash %= finalizable_hash_size;
3720 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3721 if (entry->object == obj) {
3723 /* remove from the list */
3725 prev->next = entry->next;
3727 finalizable_hash [hash] = entry->next;
3728 num_registered_finalizers--;
3729 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3730 free_internal_mem (entry);
3738 /* request to deregister, but already out of the list */
3742 entry = get_internal_mem (sizeof (FinalizeEntry));
3743 entry->object = obj;
3744 entry->next = finalizable_hash [hash];
3745 finalizable_hash [hash] = entry;
3746 num_registered_finalizers++;
3747 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3752 rehash_dislink (void)
3756 DisappearingLink **new_hash;
3757 DisappearingLink *entry, *next;
3758 int new_size = g_spaced_primes_closest (num_disappearing_links);
3760 new_hash = get_internal_mem (new_size * sizeof (DisappearingLink*));
3761 for (i = 0; i < disappearing_link_hash_size; ++i) {
3762 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3763 hash = mono_aligned_addr_hash (entry->link) % new_size;
3765 entry->next = new_hash [hash];
3766 new_hash [hash] = entry;
3769 free_internal_mem (disappearing_link_hash);
3770 disappearing_link_hash = new_hash;
3771 disappearing_link_hash_size = new_size;
3775 mono_gc_register_disappearing_link (MonoObject *obj, void **link)
3777 DisappearingLink *entry, *prev;
3781 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3783 /* FIXME: add check that link is not in the heap */
3784 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3785 entry = disappearing_link_hash [hash];
3787 for (; entry; entry = entry->next) {
3788 /* link already added */
3789 if (link == entry->link) {
3790 /* NULL obj means remove */
3793 prev->next = entry->next;
3795 disappearing_link_hash [hash] = entry->next;
3796 num_disappearing_links--;
3797 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3798 free_internal_mem (entry);
3801 *link = HIDE_POINTER (obj); /* we allow the change of object */
3808 entry = get_internal_mem (sizeof (DisappearingLink));
3809 *link = HIDE_POINTER (obj);
3811 entry->next = disappearing_link_hash [hash];
3812 disappearing_link_hash [hash] = entry;
3813 num_disappearing_links++;
3814 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3819 mono_gc_invoke_finalizers (void)
3821 FinalizeEntry *entry = NULL;
3822 gboolean entry_is_critical;
3825 /* FIXME: batch to reduce lock contention */
3826 while (fin_ready_list || critical_fin_list) {
3830 FinalizeEntry **list = entry_is_critical ? &critical_fin_list : &fin_ready_list;
3832 /* We have finalized entry in the last
3833 interation, now we need to remove it from
3836 *list = entry->next;
3838 FinalizeEntry *e = *list;
3839 while (e->next != entry)
3841 e->next = entry->next;
3843 free_internal_mem (entry);
3847 /* Now look for the first non-null entry. */
3848 for (entry = fin_ready_list; entry && !entry->object; entry = entry->next)
3851 entry_is_critical = FALSE;
3853 entry_is_critical = TRUE;
3854 for (entry = critical_fin_list; entry && !entry->object; entry = entry->next)
3859 g_assert (entry->object);
3860 num_ready_finalizers--;
3861 obj = entry->object;
3862 entry->object = NULL;
3863 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3871 g_assert (entry->object == NULL);
3873 /* the object is on the stack so it is pinned */
3874 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3875 mono_gc_run_finalize (obj, NULL);
3882 mono_gc_pending_finalizers (void)
3884 return fin_ready_list || critical_fin_list;
3887 /* Negative value to remove */
3889 mono_gc_add_memory_pressure (gint64 value)
3891 /* FIXME: Use interlocked functions */
3893 memory_pressure += value;
3898 * ######################################################################
3899 * ######## registered roots support
3900 * ######################################################################
3904 rehash_roots (gboolean pinned)
3908 RootRecord **new_hash;
3909 RootRecord *entry, *next;
3912 new_size = g_spaced_primes_closest (num_roots_entries [pinned]);
3913 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3914 for (i = 0; i < roots_hash_size [pinned]; ++i) {
3915 for (entry = roots_hash [pinned][i]; entry; entry = next) {
3916 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3918 entry->next = new_hash [hash];
3919 new_hash [hash] = entry;
3922 free_internal_mem (roots_hash [pinned]);
3923 roots_hash [pinned] = new_hash;
3924 roots_hash_size [pinned] = new_size;
3928 find_root (int root_type, char *start, guint32 addr_hash)
3930 RootRecord *new_root;
3932 guint32 hash = addr_hash % roots_hash_size [root_type];
3933 for (new_root = roots_hash [root_type][hash]; new_root; new_root = new_root->next) {
3934 /* we allow changing the size and the descriptor (for thread statics etc) */
3935 if (new_root->start_root == start) {
3944 * We do not coalesce roots.
3947 mono_gc_register_root_inner (char *start, size_t size, void *descr, int root_type)
3949 RootRecord *new_root;
3950 unsigned int hash, addr_hash = mono_aligned_addr_hash (start);
3953 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
3954 if (num_roots_entries [i] >= roots_hash_size [i] * 2)
3957 for (i = 0; i < ROOT_TYPE_NUM; ++i) {
3958 new_root = find_root (i, start, addr_hash);
3959 /* we allow changing the size and the descriptor (for thread statics etc) */
3961 size_t old_size = new_root->end_root - new_root->start_root;
3962 new_root->end_root = new_root->start_root + size;
3963 g_assert (((new_root->root_desc != 0) && (descr != NULL)) ||
3964 ((new_root->root_desc == 0) && (descr == NULL)));
3965 new_root->root_desc = (mword)descr;
3967 roots_size -= old_size;
3972 new_root = get_internal_mem (sizeof (RootRecord));
3974 new_root->start_root = start;
3975 new_root->end_root = new_root->start_root + size;
3976 new_root->root_desc = (mword)descr;
3978 hash = addr_hash % roots_hash_size [root_type];
3979 num_roots_entries [root_type]++;
3980 new_root->next = roots_hash [root_type] [hash];
3981 roots_hash [root_type][hash] = new_root;
3982 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3992 mono_gc_register_root (char *start, size_t size, void *descr)
3994 return mono_gc_register_root_inner (start, size, descr, descr ? ROOT_TYPE_NORMAL : ROOT_TYPE_PINNED);
3998 mono_gc_register_root_wbarrier (char *start, size_t size, void *descr)
4000 return mono_gc_register_root_inner (start, size, descr, ROOT_TYPE_WBARRIER);
4004 mono_gc_deregister_root (char* addr)
4006 RootRecord *tmp, *prev;
4007 unsigned int hash, addr_hash = mono_aligned_addr_hash (addr);
4011 for (root_type = 0; root_type < ROOT_TYPE_NUM; ++root_type) {
4012 hash = addr_hash % roots_hash_size [root_type];
4013 tmp = roots_hash [root_type][hash];
4016 if (tmp->start_root == (char*)addr) {
4018 prev->next = tmp->next;
4020 roots_hash [root_type][hash] = tmp->next;
4021 roots_size -= (tmp->end_root - tmp->start_root);
4022 num_roots_entries [root_type]--;
4023 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
4024 free_internal_mem (tmp);
4035 * ######################################################################
4036 * ######## Thread handling (stop/start code)
4037 * ######################################################################
4040 /* eventually share with MonoThread? */
4041 typedef struct _SgenThreadInfo SgenThreadInfo;
4043 struct _SgenThreadInfo {
4044 SgenThreadInfo *next;
4045 ARCH_THREAD_TYPE id;
4046 unsigned int stop_count; /* to catch duplicate signals */
4051 char **tlab_next_addr;
4052 char **tlab_start_addr;
4053 char **tlab_temp_end_addr;
4054 char **tlab_real_end_addr;
4055 RememberedSet *remset;
4058 /* FIXME: handle large/small config */
4059 #define THREAD_HASH_SIZE 11
4060 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
4062 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
4064 #if USE_SIGNAL_BASED_START_STOP_WORLD
4066 static sem_t suspend_ack_semaphore;
4067 static unsigned int global_stop_count = 0;
4068 static int suspend_signal_num = SIGPWR;
4069 static int restart_signal_num = SIGXCPU;
4070 static sigset_t suspend_signal_mask;
4071 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
4073 /* LOCKING: assumes the GC lock is held */
4074 static SgenThreadInfo*
4075 thread_info_lookup (ARCH_THREAD_TYPE id)
4077 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4078 SgenThreadInfo *info;
4080 info = thread_table [hash];
4081 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
4088 update_current_thread_stack (void *start)
4090 void *ptr = cur_thread_regs;
4091 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
4092 info->stack_start = align_pointer (&ptr);
4093 ARCH_STORE_REGS (ptr);
4097 signal_desc (int signum)
4099 if (signum == suspend_signal_num)
4101 if (signum == restart_signal_num)
4106 /* LOCKING: assumes the GC lock is held */
4108 thread_handshake (int signum)
4110 int count, i, result;
4111 SgenThreadInfo *info;
4112 pthread_t me = pthread_self ();
4115 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4116 for (info = thread_table [i]; info; info = info->next) {
4117 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
4118 if (ARCH_THREAD_EQUALS (info->id, me)) {
4119 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
4122 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
4124 result = pthread_kill (info->id, signum);
4126 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
4129 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
4135 for (i = 0; i < count; ++i) {
4136 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
4137 if (errno != EINTR) {
4138 g_error ("sem_wait ()");
4145 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
4147 suspend_handler (int sig)
4149 SgenThreadInfo *info;
4152 int old_errno = errno;
4154 id = pthread_self ();
4155 info = thread_info_lookup (id);
4156 stop_count = global_stop_count;
4157 /* duplicate signal */
4158 if (0 && info->stop_count == stop_count) {
4162 /* update the remset info in the thread data structure */
4163 info->remset = remembered_set;
4165 * this includes the register values that the kernel put on the stack.
4166 * Write arch-specific code to only push integer regs and a more accurate
4169 info->stack_start = align_pointer (&id);
4171 /* notify the waiting thread */
4172 sem_post (&suspend_ack_semaphore);
4173 info->stop_count = stop_count;
4175 /* wait until we receive the restart signal */
4178 sigsuspend (&suspend_signal_mask);
4179 } while (info->signal != restart_signal_num);
4181 /* notify the waiting thread */
4182 sem_post (&suspend_ack_semaphore);
4188 restart_handler (int sig)
4190 SgenThreadInfo *info;
4191 int old_errno = errno;
4193 info = thread_info_lookup (pthread_self ());
4194 info->signal = restart_signal_num;
4199 static TV_DECLARE (stop_world_time);
4200 static unsigned long max_pause_usec = 0;
4202 /* LOCKING: assumes the GC lock is held */
4208 global_stop_count++;
4209 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
4210 TV_GETTIME (stop_world_time);
4211 count = thread_handshake (suspend_signal_num);
4212 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
4216 /* LOCKING: assumes the GC lock is held */
4218 restart_world (void)
4221 TV_DECLARE (end_sw);
4224 count = thread_handshake (restart_signal_num);
4225 TV_GETTIME (end_sw);
4226 usec = TV_ELAPSED (stop_world_time, end_sw);
4227 max_pause_usec = MAX (usec, max_pause_usec);
4228 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
4232 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
4235 * Identify objects pinned in a thread stack and its registers.
4238 pin_thread_data (void *start_nursery, void *end_nursery)
4241 SgenThreadInfo *info;
4243 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4244 for (info = thread_table [i]; info; info = info->next) {
4246 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
4249 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd, pinned=%d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, next_pin_slot));
4250 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
4253 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers, pinned=%d\n", next_pin_slot));
4254 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
4258 find_pinning_ref_from_thread (char *obj, size_t size)
4261 SgenThreadInfo *info;
4262 char *endobj = obj + size;
4264 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4265 for (info = thread_table [i]; info; info = info->next) {
4266 char **start = (char**)info->stack_start;
4269 while (start < (char**)info->stack_end) {
4270 if (*start >= obj && *start < endobj) {
4271 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
4277 /* FIXME: check register */
4280 /* return TRUE if ptr points inside the managed heap */
4282 ptr_in_heap (void* ptr)
4284 mword p = (mword)ptr;
4285 if (p < lowest_heap_address || p >= highest_heap_address)
4287 /* FIXME: more checks */
4292 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
4298 /* FIXME: exclude stack locations */
4299 switch ((*p) & REMSET_TYPE_MASK) {
4300 case REMSET_LOCATION:
4302 //__builtin_prefetch (ptr);
4303 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
4304 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4305 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
4306 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4308 * If the object is pinned, each reference to it from nonpinned objects
4309 * becomes part of the global remset, which can grow very large.
4311 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4312 add_to_global_remset (ptr, FALSE);
4315 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
4319 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4320 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4323 while (count-- > 0) {
4324 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4325 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
4326 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
4327 add_to_global_remset (ptr, FALSE);
4332 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4333 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4335 scan_object (*ptr, start_nursery, end_nursery);
4337 case REMSET_OTHER: {
4338 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4342 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4345 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4347 case REMSET_ROOT_LOCATION:
4348 /* Same as REMSET_LOCATION, but the address is not required to be in the heap */
4349 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4350 DEBUG (9, fprintf (gc_debug_file, "Overwrote root location remset at %p with %p\n", ptr, *ptr));
4351 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4353 * If the object is pinned, each reference to it from nonpinned objects
4354 * becomes part of the global remset, which can grow very large.
4356 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4357 add_to_global_remset (ptr, TRUE);
4361 g_assert_not_reached ();
4366 g_assert_not_reached ();
4372 scan_from_remsets (void *start_nursery, void *end_nursery)
4375 SgenThreadInfo *info;
4376 RememberedSet *remset, *next;
4377 mword *p, *next_p, *store_pos;
4379 /* the global one */
4380 for (remset = global_remset; remset; remset = remset->next) {
4381 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4382 store_pos = remset->data;
4383 for (p = remset->data; p < remset->store_next; p = next_p) {
4386 next_p = handle_remset (p, start_nursery, end_nursery, TRUE);
4389 * Clear global remsets of locations which no longer point to the
4390 * nursery. Otherwise, they could grow indefinitely between major
4393 ptr = (p [0] & ~REMSET_TYPE_MASK);
4394 if ((p [0] & REMSET_TYPE_MASK) == REMSET_LOCATION) {
4395 if (ptr_in_nursery (*(void**)ptr))
4396 *store_pos ++ = p [0];
4398 g_assert ((p [0] & REMSET_TYPE_MASK) == REMSET_OTHER);
4399 g_assert (p [1] == REMSET_ROOT_LOCATION);
4400 if (ptr_in_nursery (*(void**)ptr)) {
4401 *store_pos ++ = p [0];
4402 *store_pos ++ = p [1];
4407 /* Truncate the remset */
4408 remset->store_next = store_pos;
4411 /* the per-thread ones */
4412 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4413 for (info = thread_table [i]; info; info = info->next) {
4414 for (remset = info->remset; remset; remset = next) {
4415 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4416 for (p = remset->data; p < remset->store_next;) {
4417 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4419 remset->store_next = remset->data;
4420 next = remset->next;
4421 remset->next = NULL;
4422 if (remset != info->remset) {
4423 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4424 free_internal_mem (remset);
4432 * Clear the info in the remembered sets: we're doing a major collection, so
4433 * the per-thread ones are not needed and the global ones will be reconstructed
4437 clear_remsets (void)
4440 SgenThreadInfo *info;
4441 RememberedSet *remset, *next;
4443 /* the global list */
4444 for (remset = global_remset; remset; remset = next) {
4445 remset->store_next = remset->data;
4446 next = remset->next;
4447 remset->next = NULL;
4448 if (remset != global_remset) {
4449 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4450 free_internal_mem (remset);
4453 /* the per-thread ones */
4454 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4455 for (info = thread_table [i]; info; info = info->next) {
4456 for (remset = info->remset; remset; remset = next) {
4457 remset->store_next = remset->data;
4458 next = remset->next;
4459 remset->next = NULL;
4460 if (remset != info->remset) {
4461 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4462 free_internal_mem (remset);
4470 * Clear the thread local TLAB variables for all threads.
4475 SgenThreadInfo *info;
4478 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4479 for (info = thread_table [i]; info; info = info->next) {
4480 /* A new TLAB will be allocated when the thread does its first allocation */
4481 *info->tlab_start_addr = NULL;
4482 *info->tlab_next_addr = NULL;
4483 *info->tlab_temp_end_addr = NULL;
4484 *info->tlab_real_end_addr = NULL;
4490 * Find the tlab_next value of the TLAB which contains ADDR.
4493 find_tlab_next_from_address (char *addr)
4495 SgenThreadInfo *info;
4498 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4499 for (info = thread_table [i]; info; info = info->next) {
4500 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4501 return *info->tlab_next_addr;
4508 /* LOCKING: assumes the GC lock is held */
4509 static SgenThreadInfo*
4510 gc_register_current_thread (void *addr)
4513 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4516 info->id = ARCH_GET_THREAD ();
4517 info->stop_count = -1;
4520 info->stack_start = NULL;
4521 info->tlab_start_addr = &tlab_start;
4522 info->tlab_next_addr = &tlab_next;
4523 info->tlab_temp_end_addr = &tlab_temp_end;
4524 info->tlab_real_end_addr = &tlab_real_end;
4526 tlab_next_addr = &tlab_next;
4528 /* try to get it with attributes first */
4529 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4533 pthread_attr_t attr;
4534 pthread_getattr_np (pthread_self (), &attr);
4535 pthread_attr_getstack (&attr, &sstart, &size);
4536 info->stack_end = (char*)sstart + size;
4537 pthread_attr_destroy (&attr);
4539 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4540 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4543 /* FIXME: we assume the stack grows down */
4544 gsize stack_bottom = (gsize)addr;
4545 stack_bottom += 4095;
4546 stack_bottom &= ~4095;
4547 info->stack_end = (char*)stack_bottom;
4551 /* hash into the table */
4552 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4553 info->next = thread_table [hash];
4554 thread_table [hash] = info;
4556 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4557 pthread_setspecific (remembered_set_key, remembered_set);
4558 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4563 unregister_current_thread (void)
4566 SgenThreadInfo *prev = NULL;
4568 RememberedSet *rset;
4569 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4571 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4572 p = thread_table [hash];
4574 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4575 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4580 thread_table [hash] = p->next;
4582 prev->next = p->next;
4585 /* FIXME: transfer remsets if any */
4587 RememberedSet *next = rset->next;
4588 free_internal_mem (rset);
4595 unregister_thread (void *k)
4598 unregister_current_thread ();
4603 mono_gc_register_thread (void *baseptr)
4605 SgenThreadInfo *info;
4607 info = thread_info_lookup (ARCH_GET_THREAD ());
4609 info = gc_register_current_thread (baseptr);
4611 return info != NULL;
4614 #if USE_PTHREAD_INTERCEPT
4616 #undef pthread_create
4618 #undef pthread_detach
4621 void *(*start_routine) (void *);
4625 } SgenThreadStartInfo;
4628 gc_start_thread (void *arg)
4630 SgenThreadStartInfo *start_info = arg;
4631 SgenThreadInfo* info;
4632 void *t_arg = start_info->arg;
4633 void *(*start_func) (void*) = start_info->start_routine;
4637 info = gc_register_current_thread (&result);
4639 sem_post (&(start_info->registered));
4640 result = start_func (t_arg);
4642 * this is done by the pthread key dtor
4644 unregister_current_thread ();
4652 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4654 SgenThreadStartInfo *start_info;
4657 start_info = malloc (sizeof (SgenThreadStartInfo));
4660 sem_init (&(start_info->registered), 0, 0);
4661 start_info->arg = arg;
4662 start_info->start_routine = start_routine;
4664 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4666 while (sem_wait (&(start_info->registered)) != 0) {
4667 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4670 sem_destroy (&(start_info->registered));
4676 mono_gc_pthread_join (pthread_t thread, void **retval)
4678 return pthread_join (thread, retval);
4682 mono_gc_pthread_detach (pthread_t thread)
4684 return pthread_detach (thread);
4687 #endif /* USE_PTHREAD_INTERCEPT */
4690 * ######################################################################
4691 * ######## Write barriers
4692 * ######################################################################
4695 static RememberedSet*
4696 alloc_remset (int size, gpointer id) {
4697 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4698 res->store_next = res->data;
4699 res->end_set = res->data + size;
4701 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4706 * Note: the write barriers first do the needed GC work and then do the actual store:
4707 * this way the value is visible to the conservative GC scan after the write barrier
4708 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4709 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4710 * happens when we need to record which pointers contain references to the new generation.
4711 * The write barrier will be executed, but the pointer is still not stored.
4714 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4717 if (ptr_in_nursery (field_ptr)) {
4718 *(void**)field_ptr = value;
4721 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4722 rs = remembered_set;
4723 if (rs->store_next < rs->end_set) {
4724 *(rs->store_next++) = (mword)field_ptr;
4725 *(void**)field_ptr = value;
4728 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4729 rs->next = remembered_set;
4730 remembered_set = rs;
4731 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4732 *(rs->store_next++) = (mword)field_ptr;
4733 *(void**)field_ptr = value;
4737 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4739 RememberedSet *rs = remembered_set;
4740 if (ptr_in_nursery (slot_ptr)) {
4741 *(void**)slot_ptr = value;
4744 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4745 if (rs->store_next < rs->end_set) {
4746 *(rs->store_next++) = (mword)slot_ptr;
4747 *(void**)slot_ptr = value;
4750 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4751 rs->next = remembered_set;
4752 remembered_set = rs;
4753 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4754 *(rs->store_next++) = (mword)slot_ptr;
4755 *(void**)slot_ptr = value;
4759 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4761 RememberedSet *rs = remembered_set;
4762 if (ptr_in_nursery (slot_ptr))
4764 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4765 if (rs->store_next + 1 < rs->end_set) {
4766 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4767 *(rs->store_next++) = count;
4770 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4771 rs->next = remembered_set;
4772 remembered_set = rs;
4773 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4774 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4775 *(rs->store_next++) = count;
4779 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4782 if (ptr_in_nursery (ptr)) {
4783 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4784 *(void**)ptr = value;
4787 rs = remembered_set;
4788 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4789 /* FIXME: ensure it is on the heap */
4790 if (rs->store_next < rs->end_set) {
4791 *(rs->store_next++) = (mword)ptr;
4792 *(void**)ptr = value;
4795 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4796 rs->next = remembered_set;
4797 remembered_set = rs;
4798 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4799 *(rs->store_next++) = (mword)ptr;
4800 *(void**)ptr = value;
4804 mono_gc_wbarrier_set_root (gpointer ptr, MonoObject *value)
4806 RememberedSet *rs = remembered_set;
4807 if (ptr_in_nursery (ptr))
4809 DEBUG (8, fprintf (gc_debug_file, "Adding root remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4811 if (rs->store_next + 2 < rs->end_set) {
4812 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
4813 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
4814 *(void**)ptr = value;
4817 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4818 rs->next = remembered_set;
4819 remembered_set = rs;
4820 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4821 *(rs->store_next++) = (mword)ptr | REMSET_OTHER;
4822 *(rs->store_next++) = (mword)REMSET_ROOT_LOCATION;
4824 *(void**)ptr = value;
4828 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4830 RememberedSet *rs = remembered_set;
4831 if (ptr_in_nursery (dest))
4833 DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4835 if (rs->store_next + 2 < rs->end_set) {
4836 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
4837 *(rs->store_next++) = (mword)REMSET_VTYPE;
4838 *(rs->store_next++) = (mword)klass->gc_descr;
4841 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4842 rs->next = remembered_set;
4843 remembered_set = rs;
4844 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4845 *(rs->store_next++) = (mword)dest | REMSET_OTHER;
4846 *(rs->store_next++) = (mword)REMSET_VTYPE;
4847 *(rs->store_next++) = (mword)klass->gc_descr;
4851 * mono_gc_wbarrier_object:
4853 * Write barrier to call when obj is the result of a clone or copy of an object.
4856 mono_gc_wbarrier_object (MonoObject* obj)
4858 RememberedSet *rs = remembered_set;
4859 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4860 if (rs->store_next < rs->end_set) {
4861 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4864 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4865 rs->next = remembered_set;
4866 remembered_set = rs;
4867 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4868 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4872 * ######################################################################
4873 * ######## Collector debugging
4874 * ######################################################################
4877 const char*descriptor_types [] = {
4889 describe_ptr (char *ptr)
4891 GCMemSection *section;
4896 if (ptr_in_nursery (ptr)) {
4897 printf ("Pointer inside nursery.\n");
4899 for (section = section_list; section;) {
4900 if (ptr >= section->data && ptr < section->data + section->size)
4902 section = section->next;
4906 printf ("Pointer inside oldspace.\n");
4907 } else if (obj_is_from_pinned_alloc (ptr)) {
4908 printf ("Pointer is inside a pinned chunk.\n");
4910 printf ("Pointer unknown.\n");
4915 if (object_is_pinned (ptr))
4916 printf ("Object is pinned.\n");
4918 if (object_is_forwarded (ptr))
4919 printf ("Object is forwared.\n");
4921 // FIXME: Handle pointers to the inside of objects
4922 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
4924 printf ("VTable: %p\n", vtable);
4925 if (vtable == NULL) {
4926 printf ("VTable is invalid (empty).\n");
4929 if (ptr_in_nursery (vtable)) {
4930 printf ("VTable is invalid (points inside nursery).\n");
4933 printf ("Class: %s\n", vtable->klass->name);
4935 desc = ((GCVTable*)vtable)->desc;
4936 printf ("Descriptor: %lx\n", desc);
4939 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
4943 find_in_remset_loc (mword *p, char *addr, gboolean *found)
4949 switch ((*p) & REMSET_TYPE_MASK) {
4950 case REMSET_LOCATION:
4951 if (*p == (mword)addr)
4955 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4957 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4961 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4962 count = safe_object_get_size ((MonoObject*)ptr);
4963 count += (ALLOC_ALIGN - 1);
4964 count &= (ALLOC_ALIGN - 1);
4965 count /= sizeof (mword);
4966 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4969 case REMSET_OTHER: {
4972 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4975 switch (desc & 0x7) {
4976 case DESC_TYPE_RUN_LENGTH:
4977 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
4978 /* The descriptor includes the size of MonoObject */
4979 skip_size -= sizeof (MonoObject);
4980 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
4985 g_assert_not_reached ();
4989 case REMSET_ROOT_LOCATION:
4992 g_assert_not_reached ();
4997 g_assert_not_reached ();
5003 * Return whenever ADDR occurs in the remembered sets
5006 find_in_remsets (char *addr)
5009 SgenThreadInfo *info;
5010 RememberedSet *remset;
5012 gboolean found = FALSE;
5014 /* the global one */
5015 for (remset = global_remset; remset; remset = remset->next) {
5016 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
5017 for (p = remset->data; p < remset->store_next;) {
5018 p = find_in_remset_loc (p, addr, &found);
5023 /* the per-thread ones */
5024 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
5025 for (info = thread_table [i]; info; info = info->next) {
5026 for (remset = info->remset; remset; remset = remset->next) {
5027 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
5028 for (p = remset->data; p < remset->store_next;) {
5029 p = find_in_remset_loc (p, addr, &found);
5041 #define HANDLE_PTR(ptr,obj) do { \
5042 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
5043 if (!find_in_remsets ((char*)(ptr))) { \
5044 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
5045 g_assert_not_reached (); \
5051 * Check that each object reference inside the area which points into the nursery
5052 * can be found in the remembered sets.
5054 static void __attribute__((noinline))
5055 check_remsets_for_area (char *start, char *end)
5060 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
5062 new_obj_references = 0;
5063 obj_references_checked = 0;
5064 while (start < end) {
5065 if (!*(void**)start) {
5066 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
5069 vt = (GCVTable*)LOAD_VTABLE (start);
5070 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
5072 MonoObject *obj = (MonoObject*)start;
5073 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
5077 if (type == DESC_TYPE_STRING) {
5078 STRING_SIZE (skip_size, start);
5082 } else if (type == DESC_TYPE_RUN_LENGTH) {
5083 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5084 g_assert (skip_size);
5085 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5089 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
5090 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
5091 skip_size *= mono_array_length ((MonoArray*)start);
5092 skip_size += sizeof (MonoArray);
5093 skip_size += (ALLOC_ALIGN - 1);
5094 skip_size &= ~(ALLOC_ALIGN - 1);
5095 OBJ_VECTOR_FOREACH_PTR (vt, start);
5096 if (((MonoArray*)start)->bounds) {
5097 /* account for the bounds */
5098 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5103 } else if (type == DESC_TYPE_SMALL_BITMAP) {
5104 OBJ_BITMAP_SIZE (skip_size, desc, start);
5105 g_assert (skip_size);
5106 OBJ_BITMAP_FOREACH_PTR (desc,start);
5110 } else if (type == DESC_TYPE_LARGE_BITMAP) {
5111 skip_size = safe_object_get_size ((MonoObject*)start);
5112 skip_size += (ALLOC_ALIGN - 1);
5113 skip_size &= ~(ALLOC_ALIGN - 1);
5114 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5118 } else if (type == DESC_TYPE_COMPLEX) {
5119 /* this is a complex object */
5120 skip_size = safe_object_get_size ((MonoObject*)start);
5121 skip_size += (ALLOC_ALIGN - 1);
5122 skip_size &= ~(ALLOC_ALIGN - 1);
5123 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5127 } else if (type == DESC_TYPE_COMPLEX_ARR) {
5128 /* this is an array of complex structs */
5129 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
5130 skip_size *= mono_array_length ((MonoArray*)start);
5131 skip_size += sizeof (MonoArray);
5132 skip_size += (ALLOC_ALIGN - 1);
5133 skip_size &= ~(ALLOC_ALIGN - 1);
5134 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5135 if (((MonoArray*)start)->bounds) {
5136 /* account for the bounds */
5137 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
5149 * Perform consistency check of the heap.
5151 * Assumes the world is stopped.
5154 check_consistency (void)
5156 GCMemSection *section;
5158 // Need to add more checks
5159 // FIXME: Create a general heap enumeration function and use that
5161 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
5163 // Check that oldspace->newspace pointers are registered with the collector
5164 for (section = section_list; section; section = section->next) {
5165 if (section->role == MEMORY_ROLE_GEN0)
5167 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
5168 check_remsets_for_area (section->data, section->next_data);
5171 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
5174 /* Check that the reference is valid */
5176 #define HANDLE_PTR(ptr,obj) do { \
5178 g_assert (safe_name (*(ptr)) != NULL); \
5185 * Perform consistency check on an object. Currently we only check that the
5186 * reference fields are valid.
5189 check_object (char *start)
5198 vt = (GCVTable*)LOAD_VTABLE (start);
5199 //type = vt->desc & 0x7;
5202 switch (desc & 0x7) {
5203 case DESC_TYPE_STRING:
5204 STRING_SIZE (skip_size, start);
5205 return start + skip_size;
5206 case DESC_TYPE_RUN_LENGTH:
5207 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
5208 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
5209 g_assert (skip_size);
5210 return start + skip_size;
5211 case DESC_TYPE_ARRAY:
5212 case DESC_TYPE_VECTOR:
5213 OBJ_VECTOR_FOREACH_PTR (vt, start);
5214 skip_size = safe_object_get_size ((MonoObject*)start);
5215 skip_size += (ALLOC_ALIGN - 1);
5216 skip_size &= ~(ALLOC_ALIGN - 1);
5217 return start + skip_size;
5218 case DESC_TYPE_SMALL_BITMAP:
5219 OBJ_BITMAP_FOREACH_PTR (desc,start);
5220 OBJ_BITMAP_SIZE (skip_size, desc, start);
5221 return start + skip_size;
5222 case DESC_TYPE_LARGE_BITMAP:
5223 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
5224 skip_size = safe_object_get_size ((MonoObject*)start);
5225 skip_size += (ALLOC_ALIGN - 1);
5226 skip_size &= ~(ALLOC_ALIGN - 1);
5227 return start + skip_size;
5228 case DESC_TYPE_COMPLEX:
5229 OBJ_COMPLEX_FOREACH_PTR (vt, start);
5230 /* this is a complex object */
5231 skip_size = safe_object_get_size ((MonoObject*)start);
5232 skip_size += (ALLOC_ALIGN - 1);
5233 skip_size &= ~(ALLOC_ALIGN - 1);
5234 return start + skip_size;
5235 case DESC_TYPE_COMPLEX_ARR:
5236 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
5237 /* this is an array of complex structs */
5238 skip_size = safe_object_get_size ((MonoObject*)start);
5239 skip_size += (ALLOC_ALIGN - 1);
5240 skip_size &= ~(ALLOC_ALIGN - 1);
5241 return start + skip_size;
5243 g_assert_not_reached ();
5248 * ######################################################################
5249 * ######## Other mono public interface functions.
5250 * ######################################################################
5254 mono_gc_collect (int generation)
5257 update_current_thread_stack (&generation);
5259 if (generation == 0) {
5260 collect_nursery (0);
5262 major_collection ();
5269 mono_gc_max_generation (void)
5275 mono_gc_collection_count (int generation)
5277 if (generation == 0)
5278 return num_minor_gcs;
5279 return num_major_gcs;
5283 mono_gc_get_used_size (void)
5286 GCMemSection *section;
5288 tot = los_memory_usage;
5289 for (section = section_list; section; section = section->next) {
5290 /* this is approximate... */
5291 tot += section->next_data - section->data;
5293 /* FIXME: account for pinned objects */
5299 mono_gc_get_heap_size (void)
5305 mono_gc_disable (void)
5313 mono_gc_enable (void)
5321 mono_object_is_alive (MonoObject* o)
5327 mono_gc_get_generation (MonoObject *obj)
5329 if (ptr_in_nursery (obj))
5335 mono_gc_enable_events (void)
5340 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
5342 mono_gc_register_disappearing_link (obj, link_addr);
5346 mono_gc_weak_link_remove (void **link_addr)
5348 mono_gc_register_disappearing_link (NULL, link_addr);
5352 mono_gc_weak_link_get (void **link_addr)
5354 MonoObject *obj = REVEAL_POINTER (*link_addr);
5356 if (obj == HIDE_POINTER (NULL))
5362 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
5364 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
5365 return (void*)MAKE_ROOT_DESC (ROOT_DESC_BITMAP, bitmap [0]);
5367 mword complex = alloc_complex_descriptor (bitmap, numbits + 1);
5368 return (void*)MAKE_ROOT_DESC (ROOT_DESC_COMPLEX, complex);
5373 mono_gc_make_root_descr_user (MonoGCMarkFunc marker)
5377 g_assert (user_descriptors_next < MAX_USER_DESCRIPTORS);
5378 descr = (void*)MAKE_ROOT_DESC (ROOT_DESC_USER, (mword)user_descriptors_next);
5379 user_descriptors [user_descriptors_next ++] = marker;
5385 mono_gc_alloc_fixed (size_t size, void *descr)
5387 /* FIXME: do a single allocation */
5388 void *res = calloc (1, size);
5391 if (!mono_gc_register_root (res, size, descr)) {
5399 mono_gc_free_fixed (void* addr)
5401 mono_gc_deregister_root (addr);
5406 mono_gc_is_gc_thread (void)
5410 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
5416 mono_gc_base_init (void)
5420 struct sigaction sinfo;
5422 LOCK_INIT (gc_mutex);
5424 if (gc_initialized) {
5428 pagesize = mono_pagesize ();
5429 gc_debug_file = stderr;
5430 if ((env = getenv ("MONO_GC_DEBUG"))) {
5431 opts = g_strsplit (env, ",", -1);
5432 for (ptr = opts; ptr && *ptr; ptr ++) {
5434 if (opt [0] >= '0' && opt [0] <= '9') {
5435 gc_debug_level = atoi (opt);
5440 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
5441 gc_debug_file = fopen (rf, "wb");
5443 gc_debug_file = stderr;
5446 } else if (!strcmp (opt, "collect-before-allocs")) {
5447 collect_before_allocs = TRUE;
5448 } else if (!strcmp (opt, "check-at-minor-collections")) {
5449 consistency_check_at_minor_collection = TRUE;
5451 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
5452 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
5453 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections.\n");
5460 sem_init (&suspend_ack_semaphore, 0, 0);
5462 sigfillset (&sinfo.sa_mask);
5463 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
5464 sinfo.sa_handler = suspend_handler;
5465 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
5466 g_error ("failed sigaction");
5469 sinfo.sa_handler = restart_handler;
5470 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
5471 g_error ("failed sigaction");
5474 sigfillset (&suspend_signal_mask);
5475 sigdelset (&suspend_signal_mask, restart_signal_num);
5477 global_remset = alloc_remset (1024, NULL);
5478 global_remset->next = NULL;
5480 pthread_key_create (&remembered_set_key, unregister_thread);
5481 gc_initialized = TRUE;
5483 mono_gc_register_thread (&sinfo);
5491 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
5492 * for each class. This is currently not easy to do, as it is hard to generate basic
5493 * blocks + branches, but it is easy with the linear IL codebase.
5496 create_allocator (int atype)
5498 int tlab_next_addr_offset = -1;
5499 int tlab_temp_end_offset = -1;
5500 int p_var, size_var, tlab_next_addr_var, new_next_var;
5501 guint32 slowpath_branch;
5502 MonoMethodBuilder *mb;
5504 MonoMethodSignature *csig;
5505 static gboolean registered = FALSE;
5507 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5508 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5510 g_assert (tlab_next_addr_offset != -1);
5511 g_assert (tlab_temp_end_offset != -1);
5513 g_assert (atype == ATYPE_NORMAL);
5516 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5520 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5521 csig->ret = &mono_defaults.object_class->byval_arg;
5522 csig->params [0] = &mono_defaults.int_class->byval_arg;
5524 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5525 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5526 /* size = vtable->klass->instance_size; */
5527 mono_mb_emit_ldarg (mb, 0);
5528 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5529 mono_mb_emit_byte (mb, CEE_ADD);
5530 mono_mb_emit_byte (mb, CEE_LDIND_I);
5531 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5532 mono_mb_emit_byte (mb, CEE_ADD);
5533 /* FIXME: assert instance_size stays a 4 byte integer */
5534 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5535 mono_mb_emit_stloc (mb, size_var);
5537 /* size += ALLOC_ALIGN - 1; */
5538 mono_mb_emit_ldloc (mb, size_var);
5539 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5540 mono_mb_emit_byte (mb, CEE_ADD);
5541 /* size &= ~(ALLOC_ALIGN - 1); */
5542 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5543 mono_mb_emit_byte (mb, CEE_AND);
5544 mono_mb_emit_stloc (mb, size_var);
5547 * We need to modify tlab_next, but the JIT only supports reading, so we read
5548 * another tls var holding its address instead.
5551 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5552 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5553 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5554 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5555 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5556 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5558 /* p = (void**)tlab_next; */
5559 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5560 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5561 mono_mb_emit_byte (mb, CEE_LDIND_I);
5562 mono_mb_emit_stloc (mb, p_var);
5564 /* new_next = (char*)p + size; */
5565 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5566 mono_mb_emit_ldloc (mb, p_var);
5567 mono_mb_emit_ldloc (mb, size_var);
5568 mono_mb_emit_byte (mb, CEE_CONV_I);
5569 mono_mb_emit_byte (mb, CEE_ADD);
5570 mono_mb_emit_stloc (mb, new_next_var);
5572 /* tlab_next = new_next */
5573 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5574 mono_mb_emit_ldloc (mb, new_next_var);
5575 mono_mb_emit_byte (mb, CEE_STIND_I);
5577 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5578 mono_mb_emit_ldloc (mb, new_next_var);
5579 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5580 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5581 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5582 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5586 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5587 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5589 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5590 mono_mb_emit_ldarg (mb, 0);
5591 mono_mb_emit_ldloc (mb, size_var);
5592 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5593 mono_mb_emit_byte (mb, CEE_RET);
5596 mono_mb_patch_short_branch (mb, slowpath_branch);
5598 /* FIXME: Memory barrier */
5601 mono_mb_emit_ldloc (mb, p_var);
5602 mono_mb_emit_ldarg (mb, 0);
5603 mono_mb_emit_byte (mb, CEE_STIND_I);
5606 mono_mb_emit_ldloc (mb, p_var);
5607 mono_mb_emit_byte (mb, CEE_RET);
5609 res = mono_mb_create_method (mb, csig, 8);
5611 mono_method_get_header (res)->init_locals = FALSE;
5615 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5618 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5619 * The signature of the called method is:
5620 * object allocate (MonoVTable *vtable)
5623 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5625 int tlab_next_offset = -1;
5626 int tlab_temp_end_offset = -1;
5627 MonoClass *klass = vtable->klass;
5628 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5629 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5631 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5633 if (klass->instance_size > tlab_size)
5635 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5639 if (klass->byval_arg.type == MONO_TYPE_STRING)
5641 if (collect_before_allocs)
5644 return mono_gc_get_managed_allocator_by_type (0);
5648 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5654 mono_gc_get_managed_allocator_by_type (int atype)
5658 mono_loader_lock ();
5659 res = alloc_method_cache [atype];
5661 res = alloc_method_cache [atype] = create_allocator (atype);
5662 mono_loader_unlock ();
5667 mono_gc_get_managed_allocator_types (void)
5672 static MonoMethod *write_barrier_method;
5675 mono_gc_get_write_barrier (void)
5678 int remset_offset = -1;
5679 int remset_var, next_var;
5680 MonoMethodBuilder *mb;
5681 MonoMethodSignature *sig;
5684 MONO_THREAD_VAR_OFFSET (remembered_set, remset_offset);
5686 // FIXME: Maybe create a separate version for ctors (the branch would be
5687 // correctly predicted more times)
5688 if (write_barrier_method)
5689 return write_barrier_method;
5691 /* Create the IL version of mono_gc_barrier_generic_store () */
5692 sig = mono_metadata_signature_alloc (mono_defaults.corlib, 2);
5693 sig->ret = &mono_defaults.void_class->byval_arg;
5694 sig->params [0] = &mono_defaults.int_class->byval_arg;
5695 sig->params [1] = &mono_defaults.object_class->byval_arg;
5697 mb = mono_mb_new (mono_defaults.object_class, "wbarrier", MONO_WRAPPER_WRITE_BARRIER);
5699 /* ptr_in_nursery () check */
5700 #ifdef ALIGN_NURSERY
5702 * Masking out the bits might be faster, but we would have to use 64 bit
5703 * immediates, which might be slower.
5705 mono_mb_emit_ldarg (mb, 0);
5706 mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
5707 mono_mb_emit_byte (mb, CEE_SHR_UN);
5708 mono_mb_emit_icon (mb, (mword)nursery_start >> DEFAULT_NURSERY_BITS);
5709 label1 = mono_mb_emit_branch (mb, CEE_BNE_UN);
5712 g_assert_not_reached ();
5715 /* Don't need write barrier case */
5716 /* do the assignment */
5717 mono_mb_emit_ldarg (mb, 0);
5718 mono_mb_emit_ldarg (mb, 1);
5719 /* Don't use STIND_REF, as it would cause infinite recursion */
5720 mono_mb_emit_byte (mb, CEE_STIND_I);
5721 mono_mb_emit_byte (mb, CEE_RET);
5723 /* Need write barrier case */
5724 mono_mb_patch_branch (mb, label1);
5726 if (remset_offset == -1)
5728 g_assert_not_reached ();
5730 // remset_var = remembered_set;
5731 remset_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5732 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5733 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5734 mono_mb_emit_i4 (mb, remset_offset);
5735 mono_mb_emit_stloc (mb, remset_var);
5737 // next_var = rs->store_next
5738 next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5739 mono_mb_emit_ldloc (mb, remset_var);
5740 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5741 mono_mb_emit_byte (mb, CEE_LDIND_I);
5742 mono_mb_emit_stloc (mb, next_var);
5744 // if (rs->store_next < rs->end_set) {
5745 mono_mb_emit_ldloc (mb, next_var);
5746 mono_mb_emit_ldloc (mb, remset_var);
5747 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, end_set));
5748 mono_mb_emit_byte (mb, CEE_LDIND_I);
5749 label2 = mono_mb_emit_branch (mb, CEE_BGE);
5751 /* write barrier fast path */
5752 // *(rs->store_next++) = (mword)ptr;
5753 mono_mb_emit_ldloc (mb, next_var);
5754 mono_mb_emit_ldarg (mb, 0);
5755 mono_mb_emit_byte (mb, CEE_STIND_I);
5757 mono_mb_emit_ldloc (mb, next_var);
5758 mono_mb_emit_icon (mb, sizeof (gpointer));
5759 mono_mb_emit_byte (mb, CEE_ADD);
5760 mono_mb_emit_stloc (mb, next_var);
5762 mono_mb_emit_ldloc (mb, remset_var);
5763 mono_mb_emit_ldflda (mb, G_STRUCT_OFFSET (RememberedSet, store_next));
5764 mono_mb_emit_ldloc (mb, next_var);
5765 mono_mb_emit_byte (mb, CEE_STIND_I);
5767 // *(void**)ptr = value;
5768 mono_mb_emit_ldarg (mb, 0);
5769 mono_mb_emit_ldarg (mb, 1);
5770 mono_mb_emit_byte (mb, CEE_STIND_I);
5771 mono_mb_emit_byte (mb, CEE_RET);
5773 /* write barrier slow path */
5774 mono_mb_patch_branch (mb, label2);
5776 mono_mb_emit_ldarg (mb, 0);
5777 mono_mb_emit_ldarg (mb, 1);
5778 mono_mb_emit_icall (mb, mono_gc_wbarrier_generic_store);
5779 mono_mb_emit_byte (mb, CEE_RET);
5781 res = mono_mb_create_method (mb, sig, 16);
5784 mono_loader_lock ();
5785 if (write_barrier_method) {
5786 /* Already created */
5787 mono_free_method (res);
5789 /* double-checked locking */
5790 mono_memory_barrier ();
5791 write_barrier_method = res;
5793 mono_loader_unlock ();
5795 return write_barrier_method;
5798 #endif /* HAVE_SGEN_GC */