2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
185 mono_gc_flush_info (void)
187 fflush (gc_debug_file);
190 #define MAX_DEBUG_LEVEL 9
191 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
193 #define TV_DECLARE(name) struct timeval name
194 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
195 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
197 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
207 /* each request from the OS ends up in a GCMemSection */
208 typedef struct _GCMemSection GCMemSection;
209 struct _GCMemSection {
213 /* pointer where more data could be allocated if it fits */
217 * scan starts is an array of pointers to objects equally spaced in the allocation area
218 * They let use quickly find pinned objects from pinning pointers.
221 /* in major collections indexes in the pin_queue for objects that pin this section */
224 unsigned short num_scan_start;
228 /* large object space struct: 64+ KB */
229 /* we could make this limit much smaller to avoid memcpy copy
230 * and potentially have more room in the GC descriptor: need to measure
231 * This also means that such small OS objects will need to be
232 * allocated in a different way (using pinned chunks).
233 * We may want to put large but smaller than 64k objects in the fixed space
234 * when we move the object from one generation to another (to limit the
235 * pig in the snake effect).
236 * Note: it may be worth to have an optimized copy function, since we can
237 * assume that objects are aligned and have a multiple of 8 size.
238 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
239 * true if MONO_ZERO_LEN_ARRAY is nonzero.
241 typedef struct _LOSObject LOSObject;
244 mword size; /* this is the object size */
245 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
248 char data [MONO_ZERO_LEN_ARRAY];
251 /* Pinned objects are allocated in the LOS space if bigger than half a page
252 * or from freelists otherwise. We assume that pinned objects are relatively few
253 * and they have a slow dying speed (like interned strings, thread objects).
254 * As such they will be collected only at major collections.
255 * free lists are not global: when we need memory we allocate a PinnedChunk.
256 * Each pinned chunk is made of several pages, the first of wich is used
257 * internally for bookeeping (here think of a page as 4KB). The bookeeping
258 * includes the freelists vectors and info about the object size of each page
259 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
260 * a size is assigned to it, the page is divided in the proper chunks and each
261 * chunk is added to the freelist. To not waste space, the remaining space in the
262 * first page is used as objects of size 16 or 32 (need to measure which are more
264 * We use this same structure to allocate memory used internally by the GC, so
265 * we never use malloc/free if we need to alloc during collection: the world is stopped
266 * and malloc/free will deadlock.
267 * When we want to iterate over pinned objects, we just scan a page at a time
268 * linearly according to the size of objects in the page: the next pointer used to link
269 * the items in the freelist uses the same word as the vtable. Since we keep freelists
270 * for each pinned chunk, if the word points outside the pinned chunk it means
272 * We could avoid this expensive scanning in creative ways. We could have a policy
273 * of putting in the pinned space only objects we know about that have no struct fields
274 * with references and we can easily use a even expensive write barrier for them,
275 * since pointer writes on such objects should be rare.
276 * The best compromise is to just alloc interned strings and System.MonoType in them.
277 * It would be nice to allocate MonoThread in it, too: must check that we properly
278 * use write barriers so we don't have to do any expensive scanning of the whole pinned
279 * chunk list during minor collections. We can avoid it now because we alloc in it only
280 * reference-free objects.
282 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
283 #define MAX_FREELIST_SIZE 2048
284 #define PINNED_PAGE_SIZE (4096)
285 #define PINNED_CHUNK_MIN_SIZE (4096*8)
286 typedef struct _PinnedChunk PinnedChunk;
287 struct _PinnedChunk {
290 int *page_sizes; /* a 0 means the page is still unused */
293 void *data [1]; /* page sizes and free lists are stored here */
296 /* The method used to clear the nursery */
297 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
298 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
303 CLEAR_AT_TLAB_CREATION
304 } NurseryClearPolicy;
306 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
309 * The young generation is divided into fragments. This is because
310 * we can hand one fragments to a thread for lock-less fast alloc and
311 * because the young generation ends up fragmented anyway by pinned objects.
312 * Once a collection is done, a list of fragments is created. When doing
313 * thread local alloc we use smallish nurseries so we allow new threads to
314 * allocate memory from gen0 without triggering a collection. Threads that
315 * are found to allocate lots of memory are given bigger fragments. This
316 * should make the finalizer thread use little nursery memory after a while.
317 * We should start assigning threads very small fragments: if there are many
318 * threads the nursery will be full of reserved space that the threads may not
319 * use at all, slowing down allocation speed.
320 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
321 * Allocation Buffers (TLABs).
323 typedef struct _Fragment Fragment;
327 char *fragment_start;
328 char *fragment_limit; /* the current soft limit for allocation */
332 /* the runtime can register areas of memory as roots: we keep two lists of roots,
333 * a pinned root set for conservatively scanned roots and a normal one for
334 * precisely scanned roots (currently implemented as a single list).
336 typedef struct _RootRecord RootRecord;
344 /* for use with write barriers */
345 typedef struct _RememberedSet RememberedSet;
346 struct _RememberedSet {
350 mword data [MONO_ZERO_LEN_ARRAY];
353 /* we have 4 possible values in the low 2 bits */
355 REMSET_LOCATION, /* just a pointer to the exact location */
356 REMSET_RANGE, /* range of pointer fields */
357 REMSET_OBJECT, /* mark all the object for scanning */
358 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
359 REMSET_TYPE_MASK = 0x3
362 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
363 static pthread_key_t remembered_set_key;
364 static RememberedSet *global_remset;
365 static int store_to_global_remset = 0;
367 /* FIXME: later choose a size that takes into account the RememberedSet struct
368 * and doesn't waste any alloc paddin space.
370 #define DEFAULT_REMSET_SIZE 1024
371 static RememberedSet* alloc_remset (int size, gpointer id);
373 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
374 * no cast from a pointer to an integer
381 /* these bits are set in the object vtable: we could merge them since an object can be
382 * either pinned or forwarded but not both.
383 * We store them in the vtable slot because the bits are used in the sync block for
384 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
385 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
386 * would be an invalid combination for the monitor and hash code).
387 * The values are already shifted.
388 * The forwarding address is stored in the sync block.
390 #define FORWARDED_BIT 1
392 #define VTABLE_BITS_MASK 0x3
394 /* returns NULL if not forwarded, or the forwarded address */
395 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
396 /* set the forwarded address fw_addr for object obj */
397 #define forward_object(obj,fw_addr) do { \
398 ((mword*)(obj))[0] |= FORWARDED_BIT; \
399 ((mword*)(obj))[1] = (mword)(fw_addr); \
402 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
403 #define pin_object(obj) do { \
404 ((mword*)(obj))[0] |= PINNED_BIT; \
406 #define unpin_object(obj) do { \
407 ((mword*)(obj))[0] &= ~PINNED_BIT; \
412 * Since we set bits in the vtable, use the macro to load it from the pointer to
413 * an object that is potentially pinned.
415 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
418 safe_name (void* obj)
420 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
421 return vt->klass->name;
425 safe_object_get_size (MonoObject* o)
427 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
428 if (klass == mono_defaults.string_class) {
429 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
430 } else if (klass->rank) {
431 MonoArray *array = (MonoArray*)o;
432 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
436 size += sizeof (MonoArrayBounds) * klass->rank;
440 /* from a created object: the class must be inited already */
441 return klass->instance_size;
445 static inline gboolean
446 is_maybe_half_constructed (MonoObject *o)
450 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
451 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
452 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
459 * ######################################################################
460 * ######## Global data.
461 * ######################################################################
463 static LOCK_DECLARE (gc_mutex);
464 static int gc_disabled = 0;
465 static int num_minor_gcs = 0;
466 static int num_major_gcs = 0;
468 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
469 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
470 #define DEFAULT_NURSERY_SIZE (1024*512*2)
471 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
472 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
473 /* to quickly find the head of an object pinned by a conservative address
474 * we keep track of the objects allocated for each SCAN_START_SIZE memory
475 * chunk in the nursery or other memory sections. Larger values have less
476 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
478 #define SCAN_START_SIZE (4096*2)
479 /* the minimum size of a fragment that we consider useful for allocation */
480 #define FRAGMENT_MIN_SIZE (512)
481 /* This is a fixed value used for pinned chunks, not the system pagesize */
482 #define FREELIST_PAGESIZE 4096
484 static mword pagesize = 4096;
485 static mword nursery_size = DEFAULT_NURSERY_SIZE;
486 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
487 static mword max_section_size = DEFAULT_MAX_SECTION;
488 static int section_size_used = 0;
489 static int degraded_mode = 0;
491 static LOSObject *los_object_list = NULL;
492 static mword los_memory_usage = 0;
493 static mword los_num_objects = 0;
494 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
495 static mword total_alloc = 0;
496 /* use this to tune when to do a major/minor collection */
497 static mword memory_pressure = 0;
499 static GCMemSection *section_list = NULL;
500 static GCMemSection *nursery_section = NULL;
501 static mword lowest_heap_address = ~(mword)0;
502 static mword highest_heap_address = 0;
504 typedef struct _FinalizeEntry FinalizeEntry;
505 struct _FinalizeEntry {
508 void *data; /* can be a disappearing link or the data for the finalizer */
509 /* Note we could use just one pointer if we don't support multiple callbacks
510 * for finalizers and per-finalizer data and if we store the obj pointers
511 * in the link like libgc does
516 * The finalizable hash has the object as the key, the
517 * disappearing_link hash, has the link address as key.
519 static FinalizeEntry **finalizable_hash = NULL;
520 /* objects that are ready to be finalized */
521 static FinalizeEntry *fin_ready_list = NULL;
522 /* disappearing links use the same structure but a different list */
523 static FinalizeEntry **disappearing_link_hash = NULL;
524 static mword disappearing_link_hash_size = 0;
525 static mword finalizable_hash_size = 0;
527 static int num_registered_finalizers = 0;
528 static int num_ready_finalizers = 0;
529 static int num_disappearing_links = 0;
530 static int no_finalize = 0;
532 /* keep each size a multiple of ALLOC_ALIGN */
533 /* on 64 bit systems 8 is likely completely unused. */
534 static const int freelist_sizes [] = {
535 8, 16, 24, 32, 40, 48, 64, 80,
536 96, 128, 160, 192, 224, 256, 320, 384,
537 448, 512, 584, 680, 816, 1024, 1360, 2048};
538 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
540 static char* max_pinned_chunk_addr = NULL;
541 static char* min_pinned_chunk_addr = (char*)-1;
542 /* pinned_chunk_list is used for allocations of objects that are never moved */
543 static PinnedChunk *pinned_chunk_list = NULL;
544 /* internal_chunk_list is used for allocating structures needed by the GC */
545 static PinnedChunk *internal_chunk_list = NULL;
548 obj_is_from_pinned_alloc (char *p)
550 PinnedChunk *chunk = pinned_chunk_list;
551 for (; chunk; chunk = chunk->next) {
552 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
558 /* registered roots: the key to the hash is the root start address */
559 static RootRecord **roots_hash = NULL;
560 static int roots_hash_size = 0;
561 static mword roots_size = 0; /* amount of memory in the root set */
562 static int num_roots_entries = 0;
565 * The current allocation cursors
566 * We allocate objects in the nursery.
567 * The nursery is the area between nursery_start and nursery_real_end.
568 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
569 * from nursery fragments.
570 * tlab_next is the pointer to the space inside the TLAB where the next object will
572 * tlab_temp_end is the pointer to the end of the temporary space reserved for
573 * the allocation: it allows us to set the scan starts at reasonable intervals.
574 * tlab_real_end points to the end of the TLAB.
575 * nursery_frag_real_end points to the end of the currently used nursery fragment.
576 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
577 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
578 * At the next allocation, the area of the nursery where objects can be present is
579 * between MIN(nursery_first_pinned_start, first_fragment_start) and
580 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
582 static char *nursery_start = NULL;
585 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
586 * variables for next+temp_end ?
588 static __thread char *tlab_start;
589 static __thread char *tlab_next;
590 static __thread char *tlab_temp_end;
591 static __thread char *tlab_real_end;
592 /* Used by the managed allocator */
593 static __thread char **tlab_next_addr;
594 static char *nursery_next = NULL;
595 static char *nursery_frag_real_end = NULL;
596 static char *nursery_real_end = NULL;
597 static char *nursery_first_pinned_start = NULL;
598 static char *nursery_last_pinned_end = NULL;
600 /* The size of a TLAB */
601 /* The bigger the value, the less often we have to go to the slow path to allocate a new
602 * one, but the more space is wasted by threads not allocating much memory.
604 * FIXME: Make this self-tuning for each thread.
606 static guint32 tlab_size = (1024 * 4);
608 /* fragments that are free and ready to be used for allocation */
609 static Fragment *nursery_fragments = NULL;
610 /* freeelist of fragment structures */
611 static Fragment *fragment_freelist = NULL;
614 * used when moving the objects
615 * When the nursery is collected, objects are copied to to_space.
616 * The area between to_space and gray_objects is used as a stack
617 * of objects that need their fields checked for more references
619 * We should optimize somehow this mechanism to avoid rescanning
620 * ptr-free objects. The order is also probably not optimal: need to
621 * test cache misses and other graph traversal orders.
623 static char *to_space = NULL;
624 static char *gray_objects = NULL;
625 static char *to_space_end = NULL;
626 static GCMemSection *to_space_section = NULL;
628 /* objects bigger then this go into the large object space */
629 #define MAX_SMALL_OBJ_SIZE 0xffff
632 * ######################################################################
633 * ######## Macros and function declarations.
634 * ######################################################################
637 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
638 if ((mword)(low) < lowest_heap_address) \
639 lowest_heap_address = (mword)(low); \
640 if ((mword)(high) > highest_heap_address) \
641 highest_heap_address = (mword)(high); \
645 align_pointer (void *ptr)
647 mword p = (mword)ptr;
648 p += sizeof (gpointer) - 1;
649 p &= ~ (sizeof (gpointer) - 1);
653 /* forward declarations */
654 static void* get_internal_mem (size_t size);
655 static void free_internal_mem (void *addr);
656 static void* get_os_memory (size_t size, int activate);
657 static void free_os_memory (void *addr, size_t size);
658 static void report_internal_mem_usage (void);
660 static int stop_world (void);
661 static int restart_world (void);
662 static void pin_thread_data (void *start_nursery, void *end_nursery);
663 static void scan_from_remsets (void *start_nursery, void *end_nursery);
664 static void find_pinning_ref_from_thread (char *obj, size_t size);
665 static void update_current_thread_stack (void *start);
666 static GCMemSection* alloc_section (size_t size);
667 static void finalize_in_range (char *start, char *end);
668 static void null_link_in_range (char *start, char *end);
669 static gboolean search_fragment_for_size (size_t size);
670 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
671 static void clear_remsets (void);
672 static void clear_tlabs (void);
673 static char *find_tlab_next_from_address (char *addr);
674 static void sweep_pinned_objects (void);
675 static void free_large_object (LOSObject *obj);
676 static void free_mem_section (GCMemSection *section);
678 void describe_ptr (char *ptr);
679 void check_consistency (void);
680 char* check_object (char *start);
683 * ######################################################################
684 * ######## GC descriptors
685 * ######################################################################
686 * Used to quickly get the info the GC needs about an object: size and
687 * where the references are held.
689 /* objects are aligned to 8 bytes boundaries
690 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
691 * The low 3 bits define the type of the descriptor. The other bits
692 * depend on the type.
693 * As a general rule the 13 remaining low bits define the size, either
694 * of the whole object or of the elements in the arrays. While for objects
695 * the size is already in bytes, for arrays we need to shift, because
696 * array elements might be smaller than 8 bytes. In case of arrays, we
697 * use two bits to describe what the additional high bits represents,
698 * so the default behaviour can handle element sizes less than 2048 bytes.
699 * The high 16 bits, if 0 it means the object is pointer-free.
700 * This design should make it easy and fast to skip over ptr-free data.
701 * The first 4 types should cover >95% of the objects.
702 * Note that since the size of objects is limited to 64K, larger objects
703 * will be allocated in the large object heap.
704 * If we want 4-bytes alignment, we need to put vector and small bitmap
708 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
709 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
710 DESC_TYPE_STRING, /* nothing */
711 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
712 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
713 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
714 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
715 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
716 /* subtypes for arrays and vectors */
717 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
718 DESC_TYPE_V_REFS, /* all the array elements are refs */
719 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
720 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
723 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
724 #define LOW_TYPE_BITS 3
725 #define SMALL_BITMAP_SHIFT 16
726 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
727 #define VECTOR_INFO_SHIFT 14
728 #define VECTOR_ELSIZE_SHIFT 3
729 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
730 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
731 #define SMALL_SIZE_MASK 0xfff8
732 #define MAX_ELEMENT_SIZE 0x3ff
733 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
734 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
735 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
736 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
737 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
739 #define ALLOC_ALIGN 8
742 /* Root bitmap descriptors are simpler: the lower two bits describe the type
743 * and we either have 30/62 bitmap bits or nibble-based run-length,
744 * or a complex descriptor
747 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
750 ROOT_DESC_LARGE_BITMAP,
751 ROOT_DESC_TYPE_MASK = 0x3,
752 ROOT_DESC_TYPE_SHIFT = 2,
755 static gsize* complex_descriptors = NULL;
756 static int complex_descriptors_size = 0;
757 static int complex_descriptors_next = 0;
760 alloc_complex_descriptor (gsize *bitmap, int numbits)
762 int nwords = numbits/GC_BITS_PER_WORD + 2;
767 res = complex_descriptors_next;
768 /* linear search, so we don't have duplicates with domain load/unload
769 * this should not be performance critical or we'd have bigger issues
770 * (the number and size of complex descriptors should be small).
772 for (i = 0; i < complex_descriptors_next; ) {
773 if (complex_descriptors [i] == nwords) {
775 for (j = 0; j < nwords - 1; ++j) {
776 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
786 i += complex_descriptors [i];
788 if (complex_descriptors_next + nwords > complex_descriptors_size) {
789 int new_size = complex_descriptors_size * 2 + nwords;
790 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
791 complex_descriptors_size = new_size;
793 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
794 complex_descriptors_next += nwords;
795 complex_descriptors [res] = nwords;
796 for (i = 0; i < nwords - 1; ++i) {
797 complex_descriptors [res + 1 + i] = bitmap [i];
798 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
805 * Descriptor builders.
808 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
810 return (void*) DESC_TYPE_STRING;
814 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
816 int first_set = -1, num_set = 0, last_set = -1, i;
818 size_t stored_size = obj_size;
819 stored_size += ALLOC_ALIGN - 1;
820 stored_size &= ~(ALLOC_ALIGN - 1);
821 for (i = 0; i < numbits; ++i) {
822 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
829 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
830 /* check run-length encoding first: one byte offset, one byte number of pointers
831 * on 64 bit archs, we can have 3 runs, just one on 32.
832 * It may be better to use nibbles.
835 desc = DESC_TYPE_RUN_LENGTH | stored_size;
836 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
838 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
839 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
840 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
843 /* we know the 2-word header is ptr-free */
844 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
845 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
846 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
850 /* we know the 2-word header is ptr-free */
851 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
852 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
853 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
856 /* it's a complex object ... */
857 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
861 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
863 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
865 int first_set = -1, num_set = 0, last_set = -1, i;
866 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
867 for (i = 0; i < numbits; ++i) {
868 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
875 if (elem_size <= MAX_ELEMENT_SIZE) {
876 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
878 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
880 /* Note: we also handle structs with just ref fields */
881 if (num_set * sizeof (gpointer) == elem_size) {
882 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
884 /* FIXME: try run-len first */
885 /* Note: we can't skip the object header here, because it's not present */
886 if (last_set <= SMALL_BITMAP_SIZE) {
887 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
890 /* it's am array of complex structs ... */
891 desc = DESC_TYPE_COMPLEX_ARR;
892 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
896 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
897 #define STRING_SIZE(size,str) do { \
898 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
899 (size) += (ALLOC_ALIGN - 1); \
900 (size) &= ~(ALLOC_ALIGN - 1); \
903 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
904 (size) = (desc) & 0xfff8; \
907 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
908 (size) = (desc) & 0xfff8; \
911 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
912 #define PREFETCH(addr)
914 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
915 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
916 if ((desc) & 0xffff0000) { \
917 /* there are pointers */ \
918 void **_objptr_end; \
919 void **_objptr = (void**)(obj); \
920 _objptr += ((desc) >> 16) & 0xff; \
921 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
922 while (_objptr < _objptr_end) { \
923 HANDLE_PTR (_objptr, (obj)); \
929 /* a bitmap desc means that there are pointer references or we'd have
930 * choosen run-length, instead: add an assert to check.
932 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
933 /* there are pointers */ \
934 void **_objptr = (void**)(obj); \
935 gsize _bmap = (desc) >> 16; \
936 _objptr += OBJECT_HEADER_WORDS; \
939 HANDLE_PTR (_objptr, (obj)); \
946 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
947 /* there are pointers */ \
948 void **_objptr = (void**)(obj); \
949 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
950 _objptr += OBJECT_HEADER_WORDS; \
953 HANDLE_PTR (_objptr, (obj)); \
960 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
961 /* there are pointers */ \
962 void **_objptr = (void**)(obj); \
963 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
964 int bwords = (*bitmap_data) - 1; \
965 void **start_run = _objptr; \
968 MonoObject *myobj = (MonoObject*)obj; \
969 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
971 while (bwords-- > 0) { \
972 gsize _bmap = *bitmap_data++; \
973 _objptr = start_run; \
974 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
977 HANDLE_PTR (_objptr, (obj)); \
982 start_run += GC_BITS_PER_WORD; \
986 /* this one is untested */
987 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
988 /* there are pointers */ \
989 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
990 int mbwords = (*mbitmap_data++) - 1; \
991 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
992 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
993 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
995 MonoObject *myobj = (MonoObject*)start; \
996 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
998 while (e_start < e_end) { \
999 void **_objptr = (void**)e_start; \
1000 gsize *bitmap_data = mbitmap_data; \
1001 unsigned int bwords = mbwords; \
1002 while (bwords-- > 0) { \
1003 gsize _bmap = *bitmap_data++; \
1004 void **start_run = _objptr; \
1005 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1007 if ((_bmap & 1)) { \
1008 HANDLE_PTR (_objptr, (obj)); \
1013 _objptr = start_run + GC_BITS_PER_WORD; \
1015 e_start += el_size; \
1019 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1020 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1021 if ((vt)->desc & 0xffffc000) { \
1022 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1023 /* there are pointers */ \
1024 int etype = (vt)->desc & 0xc000; \
1025 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1026 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1027 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1028 /* Note: this code can handle also arrays of struct with only references in them */ \
1029 while (p < end_refs) { \
1030 HANDLE_PTR (p, (obj)); \
1033 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1034 int offset = ((vt)->desc >> 16) & 0xff; \
1035 int num_refs = ((vt)->desc >> 24) & 0xff; \
1036 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1037 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1038 while (e_start < e_end) { \
1039 void **p = (void**)e_start; \
1042 for (i = 0; i < num_refs; ++i) { \
1043 HANDLE_PTR (p + i, (obj)); \
1045 e_start += el_size; \
1047 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1048 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1049 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1050 while (e_start < e_end) { \
1051 void **p = (void**)e_start; \
1052 gsize _bmap = (vt)->desc >> 16; \
1053 /* Note: there is no object header here to skip */ \
1055 if ((_bmap & 1)) { \
1056 HANDLE_PTR (p, (obj)); \
1061 e_start += el_size; \
1067 static mword new_obj_references = 0;
1068 static mword obj_references_checked = 0;
1071 #define HANDLE_PTR(ptr,obj) do { \
1072 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1073 new_obj_references++; \
1074 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1076 obj_references_checked++; \
1081 * ######################################################################
1082 * ######## Detecting and removing garbage.
1083 * ######################################################################
1084 * This section of code deals with detecting the objects no longer in use
1085 * and reclaiming the memory.
1087 static void __attribute__((noinline))
1088 scan_area (char *start, char *end)
1093 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1095 new_obj_references = 0;
1096 obj_references_checked = 0;
1097 while (start < end) {
1098 if (!*(void**)start) {
1099 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1102 vt = (GCVTable*)LOAD_VTABLE (start);
1103 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1105 MonoObject *obj = (MonoObject*)start;
1106 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1110 if (type == DESC_TYPE_STRING) {
1111 STRING_SIZE (skip_size, start);
1115 } else if (type == DESC_TYPE_RUN_LENGTH) {
1116 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1117 g_assert (skip_size);
1118 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1122 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1123 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1124 skip_size *= mono_array_length ((MonoArray*)start);
1125 skip_size += sizeof (MonoArray);
1126 skip_size += (ALLOC_ALIGN - 1);
1127 skip_size &= ~(ALLOC_ALIGN - 1);
1128 OBJ_VECTOR_FOREACH_PTR (vt, start);
1129 if (type == DESC_TYPE_ARRAY) {
1130 /* account for the bounds */
1135 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1136 OBJ_BITMAP_SIZE (skip_size, desc, start);
1137 g_assert (skip_size);
1138 OBJ_BITMAP_FOREACH_PTR (desc,start);
1142 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1143 skip_size = safe_object_get_size ((MonoObject*)start);
1144 skip_size += (ALLOC_ALIGN - 1);
1145 skip_size &= ~(ALLOC_ALIGN - 1);
1146 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1150 } else if (type == DESC_TYPE_COMPLEX) {
1151 /* this is a complex object */
1152 skip_size = safe_object_get_size ((MonoObject*)start);
1153 skip_size += (ALLOC_ALIGN - 1);
1154 skip_size &= ~(ALLOC_ALIGN - 1);
1155 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1159 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1160 /* this is an array of complex structs */
1161 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1162 skip_size *= mono_array_length ((MonoArray*)start);
1163 skip_size += sizeof (MonoArray);
1164 skip_size += (ALLOC_ALIGN - 1);
1165 skip_size &= ~(ALLOC_ALIGN - 1);
1166 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1167 if (type == DESC_TYPE_ARRAY) {
1168 /* account for the bounds */
1177 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1178 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1179 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1182 static void __attribute__((noinline))
1183 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1190 while (start < end) {
1191 if (!*(void**)start) {
1192 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1195 vt = (GCVTable*)LOAD_VTABLE (start);
1196 /* handle threads someway (maybe insert the root domain vtable?) */
1197 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1198 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1205 if (type == DESC_TYPE_STRING) {
1206 STRING_SIZE (skip_size, start);
1207 if (remove) memset (start, 0, skip_size);
1210 } else if (type == DESC_TYPE_RUN_LENGTH) {
1211 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1212 g_assert (skip_size);
1213 if (remove) memset (start, 0, skip_size);
1216 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1217 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1218 skip_size *= mono_array_length ((MonoArray*)start);
1219 skip_size += sizeof (MonoArray);
1220 skip_size += (ALLOC_ALIGN - 1);
1221 skip_size &= ~(ALLOC_ALIGN - 1);
1222 if (type == DESC_TYPE_ARRAY) {
1223 /* account for the bounds */
1225 if (remove) memset (start, 0, skip_size);
1228 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1229 OBJ_BITMAP_SIZE (skip_size, desc, start);
1230 g_assert (skip_size);
1231 if (remove) memset (start, 0, skip_size);
1234 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1235 skip_size = safe_object_get_size ((MonoObject*)start);
1236 skip_size += (ALLOC_ALIGN - 1);
1237 skip_size &= ~(ALLOC_ALIGN - 1);
1238 if (remove) memset (start, 0, skip_size);
1241 } else if (type == DESC_TYPE_COMPLEX) {
1242 /* this is a complex object */
1243 skip_size = safe_object_get_size ((MonoObject*)start);
1244 skip_size += (ALLOC_ALIGN - 1);
1245 skip_size &= ~(ALLOC_ALIGN - 1);
1246 if (remove) memset (start, 0, skip_size);
1249 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1250 /* this is an array of complex structs */
1251 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1252 skip_size *= mono_array_length ((MonoArray*)start);
1253 skip_size += sizeof (MonoArray);
1254 skip_size += (ALLOC_ALIGN - 1);
1255 skip_size &= ~(ALLOC_ALIGN - 1);
1256 if (type == DESC_TYPE_ARRAY) {
1257 /* account for the bounds */
1259 if (remove) memset (start, 0, skip_size);
1269 * When appdomains are unloaded we can easily remove objects that have finalizers,
1270 * but all the others could still be present in random places on the heap.
1271 * We need a sweep to get rid of them even though it's going to be costly
1273 * The reason we need to remove them is because we access the vtable and class
1274 * structures to know the object size and the reference bitmap: once the domain is
1275 * unloaded the point to random memory.
1278 mono_gc_clear_domain (MonoDomain * domain)
1280 GCMemSection *section;
1282 for (section = section_list; section; section = section->next) {
1283 scan_area_for_domain (domain, section->data, section->end_data);
1285 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1290 add_to_global_remset (gpointer ptr)
1293 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1294 if (global_remset->store_next < global_remset->end_set) {
1295 *(global_remset->store_next++) = (mword)ptr;
1298 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1299 rs->next = global_remset;
1301 *(global_remset->store_next++) = (mword)ptr;
1305 * This is how the copying happens from the nursery to the old generation.
1306 * We assume that at this time all the pinned objects have been identified and
1308 * We run scan_object() for each pinned object so that each referenced
1309 * objects if possible are copied. The new gray objects created can have
1310 * scan_object() run on them right away, too.
1311 * Then we run copy_object() for the precisely tracked roots. At this point
1312 * all the roots are either gray or black. We run scan_object() on the gray
1313 * objects until no more gray objects are created.
1314 * At the end of the process we walk again the pinned list and we unmark
1315 * the pinned flag. As we go we also create the list of free space for use
1316 * in the next allocation runs.
1318 * We need to remember objects from the old generation that point to the new one
1319 * (or just addresses?).
1321 * copy_object could be made into a macro once debugged (use inline for now).
1324 static char* __attribute__((noinline))
1325 copy_object (char *obj, char *from_space_start, char *from_space_end)
1327 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1331 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1332 if ((forwarded = object_is_forwarded (obj))) {
1333 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1334 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1337 if (object_is_pinned (obj)) {
1338 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1339 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1342 objsize = safe_object_get_size ((MonoObject*)obj);
1343 objsize += ALLOC_ALIGN - 1;
1344 objsize &= ~(ALLOC_ALIGN - 1);
1345 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1346 /* FIXME: handle pinned allocs:
1347 * Large objects are simple, at least until we always follow the rule:
1348 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1349 * At the end of major collections, we walk the los list and if
1350 * the object is pinned, it is marked, otherwise it can be freed.
1352 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1353 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1357 /* ok, the object is not pinned, we can move it */
1358 /* use a optimized memcpy here */
1363 char* edi = gray_objects;
1364 __asm__ __volatile__(
1366 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1367 : "0" (objsize/4), "1" (edi),"2" (esi)
1372 memcpy (gray_objects, obj, objsize);
1374 /* adjust array->bounds */
1375 vt = ((MonoObject*)obj)->vtable;
1376 g_assert (vt->gc_descr);
1377 if (vt->rank && ((MonoArray*)obj)->bounds) {
1378 MonoArray *array = (MonoArray*)gray_objects;
1379 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1380 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1382 /* set the forwarding pointer */
1383 forward_object (obj, gray_objects);
1385 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1386 gray_objects += objsize;
1387 DEBUG (8, g_assert (gray_objects <= to_space_end));
1394 #define HANDLE_PTR(ptr,obj) do { \
1396 void *__old = *(ptr); \
1397 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1398 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1399 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end)) \
1400 add_to_global_remset ((ptr)); \
1405 * Scan the object pointed to by @start for references to
1406 * other objects between @from_start and @from_end and copy
1407 * them to the gray_objects area.
1408 * Returns a pointer to the end of the object.
1411 scan_object (char *start, char* from_start, char* from_end)
1417 vt = (GCVTable*)LOAD_VTABLE (start);
1418 //type = vt->desc & 0x7;
1420 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1422 switch (desc & 0x7) {
1423 //if (type == DESC_TYPE_STRING) {
1424 case DESC_TYPE_STRING:
1425 STRING_SIZE (skip_size, start);
1426 return start + skip_size;
1427 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1428 case DESC_TYPE_RUN_LENGTH:
1429 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1430 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1431 g_assert (skip_size);
1432 return start + skip_size;
1433 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1434 case DESC_TYPE_ARRAY:
1435 case DESC_TYPE_VECTOR:
1436 OBJ_VECTOR_FOREACH_PTR (vt, start);
1437 skip_size = safe_object_get_size ((MonoObject*)start);
1439 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1440 skip_size *= mono_array_length ((MonoArray*)start);
1441 skip_size += sizeof (MonoArray);
1443 skip_size += (ALLOC_ALIGN - 1);
1444 skip_size &= ~(ALLOC_ALIGN - 1);
1445 return start + skip_size;
1446 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1447 case DESC_TYPE_SMALL_BITMAP:
1448 OBJ_BITMAP_FOREACH_PTR (desc,start);
1449 OBJ_BITMAP_SIZE (skip_size, desc, start);
1450 return start + skip_size;
1451 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1452 case DESC_TYPE_LARGE_BITMAP:
1453 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1454 skip_size = safe_object_get_size ((MonoObject*)start);
1455 skip_size += (ALLOC_ALIGN - 1);
1456 skip_size &= ~(ALLOC_ALIGN - 1);
1457 return start + skip_size;
1458 //} else if (type == DESC_TYPE_COMPLEX) {
1459 case DESC_TYPE_COMPLEX:
1460 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1461 /* this is a complex object */
1462 skip_size = safe_object_get_size ((MonoObject*)start);
1463 skip_size += (ALLOC_ALIGN - 1);
1464 skip_size &= ~(ALLOC_ALIGN - 1);
1465 return start + skip_size;
1466 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1467 case DESC_TYPE_COMPLEX_ARR:
1468 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1469 /* this is an array of complex structs */
1470 skip_size = safe_object_get_size ((MonoObject*)start);
1472 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1473 skip_size *= mono_array_length ((MonoArray*)start);
1474 skip_size += sizeof (MonoArray);
1476 skip_size += (ALLOC_ALIGN - 1);
1477 skip_size &= ~(ALLOC_ALIGN - 1);
1478 return start + skip_size;
1480 g_assert_not_reached ();
1487 * Scan the valuetype pointed to by START, described by DESC for references to
1488 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1489 * Returns a pointer to the end of the object.
1492 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1496 /* The descriptors include info about the MonoObject header as well */
1497 start -= sizeof (MonoObject);
1499 switch (desc & 0x7) {
1500 case DESC_TYPE_RUN_LENGTH:
1501 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1502 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1503 g_assert (skip_size);
1504 return start + skip_size;
1505 case DESC_TYPE_SMALL_BITMAP:
1506 OBJ_BITMAP_FOREACH_PTR (desc,start);
1507 OBJ_BITMAP_SIZE (skip_size, desc, start);
1508 return start + skip_size;
1509 case DESC_TYPE_LARGE_BITMAP:
1510 case DESC_TYPE_COMPLEX:
1512 g_assert_not_reached ();
1515 // The other descriptors can't happen with vtypes
1516 g_assert_not_reached ();
1523 * Addresses from start to end are already sorted. This function finds the object header
1524 * for each address and pins the object. The addresses must be inside the passed section.
1525 * Return the number of pinned objects.
1528 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1533 void *last_obj = NULL;
1534 size_t last_obj_size = 0;
1537 void **definitely_pinned = start;
1538 while (start < end) {
1540 /* the range check should be reduntant */
1541 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1542 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1543 /* multiple pointers to the same object */
1544 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1548 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1549 search_start = (void*)section->scan_starts [idx];
1550 if (!search_start || search_start > addr) {
1553 search_start = section->scan_starts [idx];
1554 if (search_start && search_start <= addr)
1557 if (!search_start || search_start > addr)
1558 search_start = start_nursery;
1560 if (search_start < last_obj)
1561 search_start = (char*)last_obj + last_obj_size;
1562 /* now addr should be in an object a short distance from search_start
1563 * Note that search_start must point to zeroed mem or point to an object.
1566 if (!*(void**)search_start) {
1567 mword p = (mword)search_start;
1568 p += sizeof (gpointer);
1569 p += ALLOC_ALIGN - 1;
1570 p &= ~(ALLOC_ALIGN - 1);
1571 search_start = (void*)p;
1574 last_obj = search_start;
1575 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1576 last_obj_size += ALLOC_ALIGN - 1;
1577 last_obj_size &= ~(ALLOC_ALIGN - 1);
1578 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1579 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1580 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1581 pin_object (search_start);
1582 definitely_pinned [count] = search_start;
1586 /* skip to the next object */
1587 search_start = (void*)((char*)search_start + last_obj_size);
1588 } while (search_start <= addr);
1589 /* we either pinned the correct object or we ignored the addr because
1590 * it points to unused zeroed memory.
1596 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1600 static void** pin_queue;
1601 static int pin_queue_size = 0;
1602 static int next_pin_slot = 0;
1607 gap = (gap * 10) / 13;
1608 if (gap == 9 || gap == 10)
1617 compare_addr (const void *a, const void *b)
1619 return *(const void **)a - *(const void **)b;
1623 /* sort the addresses in array in increasing order */
1625 sort_addresses (void **array, int size)
1628 * qsort is slower as predicted.
1629 * qsort (array, size, sizeof (gpointer), compare_addr);
1636 gap = new_gap (gap);
1639 for (i = 0; i < end; i++) {
1641 if (array [i] > array [j]) {
1642 void* val = array [i];
1643 array [i] = array [j];
1648 if (gap == 1 && !swapped)
1654 print_nursery_gaps (void* start_nursery, void *end_nursery)
1657 gpointer first = start_nursery;
1659 for (i = 0; i < next_pin_slot; ++i) {
1660 next = pin_queue [i];
1661 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1665 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1668 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1670 optimize_pin_queue (int start_slot)
1672 void **start, **cur, **end;
1673 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1674 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1675 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1676 if ((next_pin_slot - start_slot) > 1)
1677 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1678 start = cur = pin_queue + start_slot;
1679 end = pin_queue + next_pin_slot;
1682 while (*start == *cur && cur < end)
1686 next_pin_slot = start - pin_queue;
1687 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1688 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1693 realloc_pin_queue (void)
1695 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1696 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1697 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1698 free_internal_mem (pin_queue);
1699 pin_queue = new_pin;
1700 pin_queue_size = new_size;
1701 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1705 * Scan the memory between start and end and queue values which could be pointers
1706 * to the area between start_nursery and end_nursery for later consideration.
1707 * Typically used for thread stacks.
1710 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1713 while (start < end) {
1714 if (*start >= start_nursery && *start < end_nursery) {
1716 * *start can point to the middle of an object
1717 * note: should we handle pointing at the end of an object?
1718 * pinning in C# code disallows pointing at the end of an object
1719 * but there is some small chance that an optimizing C compiler
1720 * may keep the only reference to an object by pointing
1721 * at the end of it. We ignore this small chance for now.
1722 * Pointers to the end of an object are indistinguishable
1723 * from pointers to the start of the next object in memory
1724 * so if we allow that we'd need to pin two objects...
1725 * We queue the pointer in an array, the
1726 * array will then be sorted and uniqued. This way
1727 * we can coalesce several pinning pointers and it should
1728 * be faster since we'd do a memory scan with increasing
1729 * addresses. Note: we can align the address to the allocation
1730 * alignment, so the unique process is more effective.
1732 mword addr = (mword)*start;
1733 addr &= ~(ALLOC_ALIGN - 1);
1734 if (next_pin_slot >= pin_queue_size)
1735 realloc_pin_queue ();
1736 pin_queue [next_pin_slot++] = (void*)addr;
1737 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1742 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1744 #ifdef HAVE_VALGRIND_MEMCHECK_H
1746 * The pinning addresses might come from undefined memory, this is normal. Since they
1747 * are used in lots of functions, we make the memory defined here instead of having
1748 * to add a supression for those functions.
1750 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
1755 * If generation is 0, just mark objects in the nursery, the others we don't care,
1756 * since they are not going to move anyway.
1757 * There are different areas that are scanned for pinned pointers:
1758 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1759 * *) the pinned handle table
1760 * *) the pinned roots
1762 * Note: when we'll use a write barrier for old to new gen references, we need to
1763 * keep track of old gen objects that point to pinned new gen objects because in that
1764 * case the referenced object will be moved maybe at the next collection, but there
1765 * is no write in the old generation area where the pinned object is referenced
1766 * and we may not consider it as reachable.
1769 mark_pinned_objects (int generation)
1774 * Debugging function: find in the conservative roots where @obj is being pinned.
1777 find_pinning_reference (char *obj, size_t size)
1781 char *endobj = obj + size;
1782 for (i = 0; i < roots_hash_size; ++i) {
1783 for (root = roots_hash [i]; root; root = root->next) {
1784 /* if desc is non-null it has precise info */
1785 if (!root->root_desc) {
1786 char ** start = (char**)root->start_root;
1787 while (start < (char**)root->end_root) {
1788 if (*start >= obj && *start < endobj) {
1789 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1796 find_pinning_ref_from_thread (obj, size);
1800 * The first thing we do in a collection is to identify pinned objects.
1801 * This function considers all the areas of memory that need to be
1802 * conservatively scanned.
1805 pin_from_roots (void *start_nursery, void *end_nursery)
1809 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1810 /* objects pinned from the API are inside these roots */
1811 for (i = 0; i < roots_hash_size; ++i) {
1812 for (root = roots_hash [i]; root; root = root->next) {
1813 /* if desc is non-null it has precise info */
1814 if (root->root_desc)
1816 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1817 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1820 /* now deal with the thread stacks
1821 * in the future we should be able to conservatively scan only:
1822 * *) the cpu registers
1823 * *) the unmanaged stack frames
1824 * *) the _last_ managed stack frame
1825 * *) pointers slots in managed frames
1827 pin_thread_data (start_nursery, end_nursery);
1831 * The memory area from start_root to end_root contains pointers to objects.
1832 * Their position is precisely described by @desc (this means that the pointer
1833 * can be either NULL or the pointer to the start of an object).
1834 * This functions copies them to to_space updates them.
1837 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1839 switch (desc & ROOT_DESC_TYPE_MASK) {
1840 case ROOT_DESC_BITMAP:
1841 desc >>= ROOT_DESC_TYPE_SHIFT;
1843 if ((desc & 1) && *start_root) {
1844 *start_root = copy_object (*start_root, n_start, n_end);
1845 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1851 case ROOT_DESC_RUN_LEN:
1852 case ROOT_DESC_LARGE_BITMAP:
1853 g_assert_not_reached ();
1858 alloc_fragment (void)
1860 Fragment *frag = fragment_freelist;
1862 fragment_freelist = frag->next;
1866 frag = get_internal_mem (sizeof (Fragment));
1872 * Allocate and setup the data structures needed to be able to allocate objects
1873 * in the nursery. The nursery is stored in nursery_section.
1876 alloc_nursery (void)
1878 GCMemSection *section;
1883 if (nursery_section)
1885 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
1886 /* later we will alloc a larger area for the nursery but only activate
1887 * what we need. The rest will be used as expansion if we have too many pinned
1888 * objects in the existing nursery.
1890 /* FIXME: handle OOM */
1891 section = get_internal_mem (sizeof (GCMemSection));
1892 data = get_os_memory (nursery_size, TRUE);
1893 nursery_start = nursery_next = data;
1894 nursery_real_end = data + nursery_size;
1895 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1896 total_alloc += nursery_size;
1897 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
1898 section->data = section->next_data = data;
1899 section->size = nursery_size;
1900 section->end_data = nursery_real_end;
1901 scan_starts = nursery_size / SCAN_START_SIZE;
1902 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1903 section->num_scan_start = scan_starts;
1904 section->role = MEMORY_ROLE_GEN0;
1906 /* add to the section list */
1907 section->next = section_list;
1908 section_list = section;
1910 nursery_section = section;
1912 /* Setup the single first large fragment */
1913 frag = alloc_fragment ();
1914 frag->fragment_start = nursery_start;
1915 frag->fragment_limit = nursery_start;
1916 frag->fragment_end = nursery_real_end;
1917 nursery_frag_real_end = nursery_real_end;
1918 /* FIXME: frag here is lost */
1922 * Update roots in the old generation. Since we currently don't have the
1923 * info from the write barriers, we just scan all the objects.
1926 scan_old_generation (char *start, char* end)
1928 GCMemSection *section;
1930 LOSObject *big_object;
1933 for (section = section_list; section; section = section->next) {
1934 if (section == nursery_section)
1936 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1937 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1939 while (p < section->next_data) {
1944 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1945 p = scan_object (p, start, end);
1948 /* scan the old object space, too */
1949 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1950 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
1951 scan_object (big_object->data, start, end);
1953 /* scan the list of objects ready for finalization */
1954 for (fin = fin_ready_list; fin; fin = fin->next) {
1955 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1956 fin->object = copy_object (fin->object, start, end);
1960 static mword fragment_total = 0;
1962 * We found a fragment of free memory in the nursery: memzero it and if
1963 * it is big enough, add it to the list of fragments that can be used for
1967 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1970 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
1971 /* memsetting just the first chunk start is bound to provide better cache locality */
1972 if (nursery_clear_policy == CLEAR_AT_GC)
1973 memset (frag_start, 0, frag_size);
1974 /* Not worth dealing with smaller fragments: need to tune */
1975 if (frag_size >= FRAGMENT_MIN_SIZE) {
1976 fragment = alloc_fragment ();
1977 fragment->fragment_start = frag_start;
1978 fragment->fragment_limit = frag_start;
1979 fragment->fragment_end = frag_end;
1980 fragment->next = nursery_fragments;
1981 nursery_fragments = fragment;
1982 fragment_total += frag_size;
1984 /* Clear unused fragments, pinning depends on this */
1985 memset (frag_start, 0, frag_size);
1990 scan_needed_big_objects (char *start_addr, char *end_addr)
1992 LOSObject *big_object;
1994 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1995 if (!big_object->scanned && object_is_pinned (big_object->data)) {
1996 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
1997 scan_object (big_object->data, start_addr, end_addr);
1998 big_object->scanned = TRUE;
2006 drain_gray_stack (char *start_addr, char *end_addr)
2010 int fin_ready, bigo_scanned_num;
2014 * We copied all the reachable objects. Now it's the time to copy
2015 * the objects that were not referenced by the roots, but by the copied objects.
2016 * we built a stack of objects pointed to by gray_start: they are
2017 * additional roots and we may add more items as we go.
2018 * We loop until gray_start == gray_objects which means no more objects have
2019 * been added. Note this is iterative: no recursion is involved.
2020 * We need to walk the LO list as well in search of marked big objects
2021 * (use a flag since this is needed only on major collections). We need to loop
2022 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2025 gray_start = to_space;
2026 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2027 while (gray_start < gray_objects) {
2028 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2029 gray_start = scan_object (gray_start, start_addr, end_addr);
2032 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2033 //scan_old_generation (start_addr, end_addr);
2034 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2035 /* walk the finalization queue and move also the objects that need to be
2036 * finalized: use the finalized objects as new roots so the objects they depend
2037 * on are also not reclaimed. As with the roots above, only objects in the nursery
2038 * are marked/copied.
2039 * We need a loop here, since objects ready for finalizers may reference other objects
2040 * that are fin-ready. Speedup with a flag?
2043 fin_ready = num_ready_finalizers;
2044 finalize_in_range (start_addr, end_addr);
2045 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2047 /* drain the new stack that might have been created */
2048 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2049 while (gray_start < gray_objects) {
2050 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2051 gray_start = scan_object (gray_start, start_addr, end_addr);
2053 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2055 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2056 to_space = gray_start;
2057 to_space_section->next_data = to_space;
2060 * handle disappearing links
2061 * Note we do this after checking the finalization queue because if an object
2062 * survives (at least long enough to be finalized) we don't clear the link.
2063 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2064 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2067 null_link_in_range (start_addr, end_addr);
2069 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2072 static int last_num_pinned = 0;
2075 build_nursery_fragments (int start_pin, int end_pin)
2077 char *frag_start, *frag_end;
2081 /* FIXME: handle non-NULL fragment_freelist */
2082 fragment_freelist = nursery_fragments;
2083 nursery_fragments = NULL;
2084 frag_start = nursery_start;
2086 /* clear scan starts */
2087 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2088 for (i = start_pin; i < end_pin; ++i) {
2089 frag_end = pin_queue [i];
2090 /* remove the pin bit from pinned objects */
2091 unpin_object (frag_end);
2092 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2093 frag_size = frag_end - frag_start;
2095 add_nursery_frag (frag_size, frag_start, frag_end);
2096 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2097 frag_size += ALLOC_ALIGN - 1;
2098 frag_size &= ~(ALLOC_ALIGN - 1);
2099 frag_start = (char*)pin_queue [i] + frag_size;
2101 * pin_queue [i] might point to a half-constructed string or vector whose
2102 * length field is not set. In that case, frag_start points inside the
2103 * (zero initialized) object. Find the end of the object by scanning forward.
2106 if (is_maybe_half_constructed (pin_queue [i])) {
2109 /* This is also hit for zero length arrays/strings */
2111 /* Find the end of the TLAB which contained this allocation */
2112 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2115 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2116 frag_start += sizeof (mword);
2119 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2120 * half constructed object.
2125 nursery_last_pinned_end = frag_start;
2126 frag_end = nursery_real_end;
2127 frag_size = frag_end - frag_start;
2129 add_nursery_frag (frag_size, frag_start, frag_end);
2130 if (!nursery_fragments) {
2131 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2132 for (i = start_pin; i < end_pin; ++i) {
2133 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2138 nursery_next = nursery_frag_real_end = NULL;
2140 /* Clear TLABs for all threads */
2144 /* FIXME: later reduce code duplication here with the above
2145 * We don't keep track of section fragments for non-nursery sections yet, so
2149 build_section_fragments (GCMemSection *section)
2152 char *frag_start, *frag_end;
2155 /* clear scan starts */
2156 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2157 frag_start = section->data;
2158 section->next_data = section->data;
2159 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2160 frag_end = pin_queue [i];
2161 /* remove the pin bit from pinned objects */
2162 unpin_object (frag_end);
2163 if (frag_end >= section->data + section->size) {
2164 frag_end = section->data + section->size;
2166 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2168 frag_size = frag_end - frag_start;
2170 memset (frag_start, 0, frag_size);
2171 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2172 frag_size += ALLOC_ALIGN - 1;
2173 frag_size &= ~(ALLOC_ALIGN - 1);
2174 frag_start = (char*)pin_queue [i] + frag_size;
2175 section->next_data = MAX (section->next_data, frag_start);
2177 frag_end = section->end_data;
2178 frag_size = frag_end - frag_start;
2180 memset (frag_start, 0, frag_size);
2184 scan_from_registered_roots (char *addr_start, char *addr_end)
2188 for (i = 0; i < roots_hash_size; ++i) {
2189 for (root = roots_hash [i]; root; root = root->next) {
2190 /* if desc is non-null it has precise info */
2191 if (!root->root_desc)
2193 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2194 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2200 * Collect objects in the nursery.
2203 collect_nursery (size_t requested_size)
2205 GCMemSection *section;
2206 size_t max_garbage_amount;
2208 char *orig_nursery_next;
2210 TV_DECLARE (all_atv);
2211 TV_DECLARE (all_btv);
2216 orig_nursery_next = nursery_next;
2217 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2218 /* FIXME: optimize later to use the higher address where an object can be present */
2219 nursery_next = MAX (nursery_next, nursery_real_end);
2221 if (consistency_check_at_minor_collection)
2222 check_consistency ();
2224 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2225 max_garbage_amount = nursery_next - nursery_start;
2227 /* Clear all remaining nursery fragments, pinning depends on this */
2228 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2229 g_assert (orig_nursery_next <= nursery_frag_real_end);
2230 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2231 for (frag = nursery_fragments; frag; frag = frag->next) {
2232 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2237 * not enough room in the old generation to store all the possible data from
2238 * the nursery in a single continuous space.
2239 * We reset to_space if we allocated objects in degraded mode.
2241 if (to_space_section)
2242 to_space = gray_objects = to_space_section->next_data;
2243 if ((to_space_end - to_space) < max_garbage_amount) {
2244 section = alloc_section (nursery_section->size * 4);
2245 g_assert (nursery_section->size >= max_garbage_amount);
2246 to_space = gray_objects = section->next_data;
2247 to_space_end = section->end_data;
2248 to_space_section = section;
2250 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2251 nursery_section->next_data = nursery_next;
2254 mono_stats.minor_gc_count ++;
2255 /* world must be stopped already */
2256 TV_GETTIME (all_atv);
2258 /* pin from pinned handles */
2259 pin_from_roots (nursery_start, nursery_next);
2260 /* identify pinned objects */
2261 optimize_pin_queue (0);
2262 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2264 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2265 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2268 * walk all the roots and copy the young objects to the old generation,
2269 * starting from to_space
2272 scan_from_remsets (nursery_start, nursery_next);
2273 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2275 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2276 /* FIXME: later scan also alloc_pinned objects */
2278 /* the pinned objects are roots */
2279 for (i = 0; i < next_pin_slot; ++i) {
2280 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2281 scan_object (pin_queue [i], nursery_start, nursery_next);
2283 /* registered roots, this includes static fields */
2284 scan_from_registered_roots (nursery_start, nursery_next);
2286 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2288 drain_gray_stack (nursery_start, nursery_next);
2290 /* walk the pin_queue, build up the fragment list of free memory, unmark
2291 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2294 build_nursery_fragments (0, next_pin_slot);
2296 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2298 TV_GETTIME (all_btv);
2299 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2301 /* prepare the pin queue for the next collection */
2302 last_num_pinned = next_pin_slot;
2304 if (fin_ready_list) {
2305 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2306 mono_gc_finalize_notify ();
2311 major_collection (void)
2313 GCMemSection *section, *prev_section;
2314 LOSObject *bigobj, *prevbo;
2320 TV_DECLARE (all_atv);
2321 TV_DECLARE (all_btv);
2324 /* FIXME: only use these values for the precise scan
2325 * note that to_space pointers should be excluded anyway...
2327 char *heap_start = NULL;
2328 char *heap_end = (char*)-1;
2329 size_t copy_space_required = 0;
2332 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2334 mono_stats.major_gc_count ++;
2336 /* Clear all remaining nursery fragments, pinning depends on this */
2337 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2338 g_assert (nursery_next <= nursery_frag_real_end);
2339 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2340 for (frag = nursery_fragments; frag; frag = frag->next) {
2341 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2346 * FIXME: implement Mark/Compact
2347 * Until that is done, we can just apply mostly the same alg as for the nursery:
2348 * this means we need a big section to potentially copy all the other sections, so
2349 * it is not ideal specially with large heaps.
2351 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2352 collect_nursery (0);
2355 TV_GETTIME (all_atv);
2356 /* FIXME: make sure the nursery next_data ptr is updated */
2357 nursery_section->next_data = nursery_real_end;
2358 /* we should also coalesce scanning from sections close to each other
2359 * and deal with pointers outside of the sections later.
2361 /* The remsets are not useful for a major collection */
2363 /* world must be stopped already */
2365 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2366 for (section = section_list; section; section = section->next) {
2367 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2368 pin_from_roots (section->data, section->next_data);
2369 if (count != next_pin_slot) {
2371 optimize_pin_queue (count);
2372 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2373 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2374 section->pin_queue_end = next_pin_slot = count + reduced_to;
2376 copy_space_required += (char*)section->next_data - (char*)section->data;
2378 /* identify possible pointers to the insize of large objects */
2379 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2380 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2381 count = next_pin_slot;
2382 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2383 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2384 if (next_pin_slot != count) {
2385 next_pin_slot = count;
2386 pin_object (bigobj->data);
2387 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2390 /* look for pinned addresses for pinned-alloc objects */
2391 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2392 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2393 count = next_pin_slot;
2394 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2395 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2396 if (next_pin_slot != count) {
2397 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2398 next_pin_slot = count;
2403 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2404 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2406 /* allocate the big to space */
2407 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2408 section = alloc_section (copy_space_required);
2409 to_space = gray_objects = section->next_data;
2410 to_space_end = section->end_data;
2411 to_space_section = section;
2413 /* the old generation doesn't need to be scanned (no remembered sets or card
2414 * table needed either): the only objects that must survive are those pinned and
2415 * those referenced by the precise roots.
2416 * mark any section without pinned objects, so we can free it since we will be able to
2417 * move all the objects.
2419 /* the pinned objects are roots (big objects are included in this list, too) */
2420 for (i = 0; i < next_pin_slot; ++i) {
2421 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2422 scan_object (pin_queue [i], heap_start, heap_end);
2424 /* registered roots, this includes static fields */
2425 scan_from_registered_roots (heap_start, heap_end);
2427 /* scan the list of objects ready for finalization */
2428 for (fin = fin_ready_list; fin; fin = fin->next) {
2429 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2430 fin->object = copy_object (fin->object, heap_start, heap_end);
2433 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2435 /* we need to go over the big object list to see if any was marked and scan it
2436 * And we need to make this in a loop, considering that objects referenced by finalizable
2437 * objects could reference big objects (this happens in drain_gray_stack ())
2439 scan_needed_big_objects (heap_start, heap_end);
2440 /* all the objects in the heap */
2441 drain_gray_stack (heap_start, heap_end);
2443 /* sweep the big objects list */
2445 for (bigobj = los_object_list; bigobj;) {
2446 if (object_is_pinned (bigobj->data)) {
2447 unpin_object (bigobj->data);
2448 bigobj->scanned = FALSE;
2451 /* not referenced anywhere, so we can free it */
2453 prevbo->next = bigobj->next;
2455 los_object_list = bigobj->next;
2457 bigobj = bigobj->next;
2458 free_large_object (to_free);
2462 bigobj = bigobj->next;
2464 /* unpin objects from the pinned chunks and free the unmarked ones */
2465 sweep_pinned_objects ();
2467 /* free the unused sections */
2468 prev_section = NULL;
2469 for (section = section_list; section;) {
2470 /* to_space doesn't need handling here and the nursery is special */
2471 if (section == to_space_section || section == nursery_section) {
2472 prev_section = section;
2473 section = section->next;
2476 /* no pinning object, so the section is free */
2477 if (section->pin_queue_start == section->pin_queue_end) {
2478 GCMemSection *to_free;
2480 prev_section->next = section->next;
2482 section_list = section->next;
2484 section = section->next;
2485 free_mem_section (to_free);
2488 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2489 build_section_fragments (section);
2491 prev_section = section;
2492 section = section->next;
2495 /* walk the pin_queue, build up the fragment list of free memory, unmark
2496 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2499 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2501 TV_GETTIME (all_btv);
2502 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2503 /* prepare the pin queue for the next collection */
2505 if (fin_ready_list) {
2506 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2507 mono_gc_finalize_notify ();
2512 * Allocate a new section of memory to be used as old generation.
2514 static GCMemSection*
2515 alloc_section (size_t size)
2517 GCMemSection *section;
2520 size_t new_size = next_section_size;
2522 if (size > next_section_size) {
2524 new_size += pagesize - 1;
2525 new_size &= ~(pagesize - 1);
2527 section_size_used++;
2528 if (section_size_used > 3) {
2529 section_size_used = 0;
2530 next_section_size *= 2;
2531 if (next_section_size > max_section_size)
2532 next_section_size = max_section_size;
2534 section = get_internal_mem (sizeof (GCMemSection));
2535 data = get_os_memory (new_size, TRUE);
2536 section->data = section->next_data = data;
2537 section->size = new_size;
2538 section->end_data = data + new_size;
2539 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2540 total_alloc += new_size;
2541 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2542 section->data = data;
2543 section->size = new_size;
2544 scan_starts = new_size / SCAN_START_SIZE;
2545 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2546 section->num_scan_start = scan_starts;
2547 section->role = MEMORY_ROLE_GEN1;
2549 /* add to the section list */
2550 section->next = section_list;
2551 section_list = section;
2557 free_mem_section (GCMemSection *section)
2559 char *data = section->data;
2560 size_t size = section->size;
2561 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2562 free_os_memory (data, size);
2563 free_internal_mem (section);
2564 total_alloc -= size;
2568 * When deciding if it's better to collect or to expand, keep track
2569 * of how much garbage was reclaimed with the last collection: if it's too
2571 * This is called when we could not allocate a small object.
2573 static void __attribute__((noinline))
2574 minor_collect_or_expand_inner (size_t size)
2576 int do_minor_collection = 1;
2578 if (!nursery_section) {
2582 if (do_minor_collection) {
2584 collect_nursery (size);
2585 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2587 /* this also sets the proper pointers for the next allocation */
2588 if (!search_fragment_for_size (size)) {
2590 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2591 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2592 for (i = 0; i < last_num_pinned; ++i) {
2593 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2598 //report_internal_mem_usage ();
2602 * ######################################################################
2603 * ######## Memory allocation from the OS
2604 * ######################################################################
2605 * This section of code deals with getting memory from the OS and
2606 * allocating memory for GC-internal data structures.
2607 * Internal memory can be handled with a freelist for small objects.
2611 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2612 * This must not require any lock.
2615 get_os_memory (size_t size, int activate)
2618 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2620 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2621 size += pagesize - 1;
2622 size &= ~(pagesize - 1);
2623 ptr = mono_valloc (0, size, prot_flags);
2628 * Free the memory returned by get_os_memory (), returning it to the OS.
2631 free_os_memory (void *addr, size_t size)
2633 munmap (addr, size);
2640 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2642 int i, free_pages, num_free, free_mem;
2644 for (i = 0; i < chunk->num_pages; ++i) {
2645 if (!chunk->page_sizes [i])
2648 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2649 free_mem = FREELIST_PAGESIZE * free_pages;
2650 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2651 if (!chunk->free_list [i])
2654 p = chunk->free_list [i];
2659 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2660 free_mem += freelist_sizes [i] * num_free;
2662 printf ("\tfree memory in chunk: %d\n", free_mem);
2669 report_internal_mem_usage (void) {
2672 printf ("Internal memory usage:\n");
2674 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2675 report_pinned_chunk (chunk, i++);
2677 printf ("Pinned memory usage:\n");
2679 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2680 report_pinned_chunk (chunk, i++);
2685 * the array of pointers from @start to @end contains conservative
2686 * pointers to objects inside @chunk: mark each referenced object
2690 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2692 for (; start < end; start++) {
2693 char *addr = *start;
2694 int offset = (char*)addr - (char*)chunk;
2695 int page = offset / FREELIST_PAGESIZE;
2696 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2697 int slot_size = chunk->page_sizes [page];
2699 /* the page is not allocated */
2702 /* would be faster if we restrict the sizes to power of two,
2703 * but that's a waste of memory: need to measure. it could reduce
2704 * fragmentation since there are less pages needed, if for example
2705 * someone interns strings of each size we end up with one page per
2706 * interned string (still this is just ~40 KB): with more fine-grained sizes
2707 * this increases the number of used pages.
2710 obj_offset /= slot_size;
2711 obj_offset *= slot_size;
2712 addr = (char*)chunk->start_data + obj_offset;
2714 obj_offset /= slot_size;
2715 obj_offset *= slot_size;
2716 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2719 /* if the vtable is inside the chunk it's on the freelist, so skip */
2720 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2722 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2728 sweep_pinned_objects (void)
2735 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2736 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2737 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2738 for (i = 0; i < chunk->num_pages; ++i) {
2739 obj_size = chunk->page_sizes [i];
2742 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2743 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2744 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2745 while (p + obj_size <= endp) {
2747 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2748 /* if the first word (the vtable) is outside the chunk we have an object */
2749 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2750 if (object_is_pinned (ptr)) {
2752 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2754 /* FIXME: add to freelist */
2755 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2765 * Find the slot number in the freelist for memory chunks that
2766 * can contain @size objects.
2769 slot_for_size (size_t size)
2772 /* do a binary search or lookup table later. */
2773 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2774 if (freelist_sizes [slot] >= size)
2777 g_assert_not_reached ();
2782 * Build a free list for @size memory chunks from the memory area between
2783 * start_page and end_page.
2786 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2790 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2791 p = (void**)start_page;
2792 end = (void**)(end_page - size);
2793 g_assert (!chunk->free_list [slot]);
2794 chunk->free_list [slot] = p;
2795 while ((char*)p + size <= (char*)end) {
2797 *p = (void*)((char*)p + size);
2801 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2805 alloc_pinned_chunk (size_t size)
2810 size += pagesize; /* at least one page */
2811 size += pagesize - 1;
2812 size &= ~(pagesize - 1);
2813 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2814 size = PINNED_CHUNK_MIN_SIZE * 2;
2815 chunk = get_os_memory (size, TRUE);
2816 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2817 total_alloc += size;
2819 /* setup the bookeeping fields */
2820 chunk->num_pages = size / FREELIST_PAGESIZE;
2821 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2822 chunk->page_sizes = (void*)((char*)chunk + offset);
2823 offset += sizeof (int) * chunk->num_pages;
2824 offset += ALLOC_ALIGN - 1;
2825 offset &= ~(ALLOC_ALIGN - 1);
2826 chunk->free_list = (void*)((char*)chunk + offset);
2827 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2828 offset += ALLOC_ALIGN - 1;
2829 offset &= ~(ALLOC_ALIGN - 1);
2830 chunk->start_data = (void*)((char*)chunk + offset);
2832 /* allocate the first page to the freelist */
2833 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2834 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2835 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
2836 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2837 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2841 /* assumes freelist for slot is empty, so try to alloc a new page */
2843 get_chunk_freelist (PinnedChunk *chunk, int slot)
2847 p = chunk->free_list [slot];
2849 chunk->free_list [slot] = *p;
2852 for (i = 0; i < chunk->num_pages; ++i) {
2854 if (chunk->page_sizes [i])
2856 size = freelist_sizes [slot];
2857 chunk->page_sizes [i] = size;
2858 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2862 p = chunk->free_list [slot];
2864 chunk->free_list [slot] = *p;
2871 alloc_from_freelist (size_t size)
2875 PinnedChunk *pchunk;
2876 slot = slot_for_size (size);
2877 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2878 g_assert (size <= freelist_sizes [slot]);
2879 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2880 void **p = pchunk->free_list [slot];
2882 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2883 pchunk->free_list [slot] = *p;
2887 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2888 res = get_chunk_freelist (pchunk, slot);
2892 pchunk = alloc_pinned_chunk (size);
2893 /* FIXME: handle OOM */
2894 pchunk->next = pinned_chunk_list;
2895 pinned_chunk_list = pchunk;
2896 res = get_chunk_freelist (pchunk, slot);
2900 /* used for the GC-internal data structures */
2901 /* FIXME: add support for bigger sizes by allocating more than one page
2905 get_internal_mem (size_t size)
2907 return calloc (1, size);
2911 PinnedChunk *pchunk;
2912 slot = slot_for_size (size);
2913 g_assert (size <= freelist_sizes [slot]);
2914 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2915 void **p = pchunk->free_list [slot];
2917 pchunk->free_list [slot] = *p;
2921 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2922 res = get_chunk_freelist (pchunk, slot);
2926 pchunk = alloc_pinned_chunk (size);
2927 /* FIXME: handle OOM */
2928 pchunk->next = internal_chunk_list;
2929 internal_chunk_list = pchunk;
2930 res = get_chunk_freelist (pchunk, slot);
2936 free_internal_mem (void *addr)
2940 PinnedChunk *pchunk;
2941 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2942 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
2943 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
2944 int offset = (char*)addr - (char*)pchunk;
2945 int page = offset / FREELIST_PAGESIZE;
2946 int slot = slot_for_size (pchunk->page_sizes [page]);
2948 *p = pchunk->free_list [slot];
2949 pchunk->free_list [slot] = p;
2953 printf ("free of %p failed\n", addr);
2954 g_assert_not_reached ();
2959 * ######################################################################
2960 * ######## Object allocation
2961 * ######################################################################
2962 * This section of code deals with allocating memory for objects.
2963 * There are several ways:
2964 * *) allocate large objects
2965 * *) allocate normal objects
2966 * *) fast lock-free allocation
2967 * *) allocation of pinned objects
2971 free_large_object (LOSObject *obj)
2973 size_t size = obj->size;
2974 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
2976 los_memory_usage -= size;
2977 size += sizeof (LOSObject);
2978 size += pagesize - 1;
2979 size &= ~(pagesize - 1);
2980 total_alloc -= size;
2982 free_os_memory (obj, size);
2986 * Objects with size >= 64KB are allocated in the large object space.
2987 * They are currently kept track of with a linked list.
2988 * They don't move, so there is no need to pin them during collection
2989 * and we avoid the memcpy overhead.
2991 static void* __attribute__((noinline))
2992 alloc_large_inner (MonoVTable *vtable, size_t size)
2997 int just_did_major_gc = FALSE;
2999 if (los_memory_usage > next_los_collection) {
3000 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
3001 just_did_major_gc = TRUE;
3003 major_collection ();
3005 /* later increase based on a percent of the heap size */
3006 next_los_collection = los_memory_usage + 5*1024*1024;
3009 alloc_size += sizeof (LOSObject);
3010 alloc_size += pagesize - 1;
3011 alloc_size &= ~(pagesize - 1);
3012 /* FIXME: handle OOM */
3013 obj = get_os_memory (alloc_size, TRUE);
3015 vtslot = (void**)obj->data;
3017 total_alloc += alloc_size;
3018 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3019 obj->next = los_object_list;
3020 los_object_list = obj;
3021 los_memory_usage += size;
3023 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3027 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3028 * an object of size @size
3029 * Return FALSE if not found (which means we need a collection)
3032 search_fragment_for_size (size_t size)
3034 Fragment *frag, *prev;
3035 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3037 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3038 /* Clear the remaining space, pinning depends on this */
3039 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3042 for (frag = nursery_fragments; frag; frag = frag->next) {
3043 if (size <= (frag->fragment_end - frag->fragment_start)) {
3044 /* remove from the list */
3046 prev->next = frag->next;
3048 nursery_fragments = frag->next;
3049 nursery_next = frag->fragment_start;
3050 nursery_frag_real_end = frag->fragment_end;
3052 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3053 frag->next = fragment_freelist;
3054 fragment_freelist = frag;
3063 * size is already rounded up and we hold the GC lock.
3066 alloc_degraded (MonoVTable *vtable, size_t size)
3068 GCMemSection *section;
3070 for (section = section_list; section; section = section->next) {
3071 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3072 p = (void**)section->next_data;
3077 section = alloc_section (nursery_section->size * 4);
3078 /* FIXME: handle OOM */
3079 p = (void**)section->next_data;
3081 section->next_data += size;
3082 degraded_mode += size;
3083 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3089 * Provide a variant that takes just the vtable for small fixed-size objects.
3090 * The aligned size is already computed and stored in vt->gc_descr.
3091 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3092 * processing. We can keep track of where objects start, for example,
3093 * so when we scan the thread stacks for pinned objects, we can start
3094 * a search for the pinned object in SCAN_START_SIZE chunks.
3097 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3099 /* FIXME: handle OOM */
3104 size += ALLOC_ALIGN - 1;
3105 size &= ~(ALLOC_ALIGN - 1);
3107 g_assert (vtable->gc_descr);
3109 if (G_UNLIKELY (collect_before_allocs)) {
3112 if (nursery_section) {
3115 update_current_thread_stack (&dummy);
3117 collect_nursery (0);
3119 if (!degraded_mode && !search_fragment_for_size (size)) {
3121 g_assert_not_reached ();
3127 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3129 p = (void**)tlab_next;
3130 /* FIXME: handle overflow */
3131 new_next = (char*)p + size;
3132 tlab_next = new_next;
3134 if (G_LIKELY (new_next < tlab_temp_end)) {
3138 * FIXME: We might need a memory barrier here so the change to tlab_next is
3139 * visible before the vtable store.
3142 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3150 /* there are two cases: the object is too big or we run out of space in the TLAB */
3151 /* we also reach here when the thread does its first allocation after a minor
3152 * collection, since the tlab_ variables are initialized to NULL.
3153 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3154 * objects that need finalizers can have the high bit set in their size
3155 * so the above check fails and we can readily add the object to the queue.
3156 * This avoids taking again the GC lock when registering, but this is moot when
3157 * doing thread-local allocation, so it may not be a good idea.
3160 if (size > MAX_SMALL_OBJ_SIZE) {
3161 /* get ready for possible collection */
3162 update_current_thread_stack (&dummy);
3164 p = alloc_large_inner (vtable, size);
3166 if (tlab_next >= tlab_real_end) {
3168 * Run out of space in the TLAB. When this happens, some amount of space
3169 * remains in the TLAB, but not enough to satisfy the current allocation
3170 * request. Currently, we retire the TLAB in all cases, later we could
3171 * keep it if the remaining space is above a treshold, and satisfy the
3172 * allocation directly from the nursery.
3175 /* when running in degraded mode, we continue allocing that way
3176 * for a while, to decrease the number of useless nursery collections.
3178 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3179 p = alloc_degraded (vtable, size);
3184 if (size > tlab_size) {
3185 /* Allocate directly from the nursery */
3186 if (nursery_next + size >= nursery_frag_real_end) {
3187 if (!search_fragment_for_size (size)) {
3188 /* get ready for possible collection */
3189 update_current_thread_stack (&dummy);
3190 minor_collect_or_expand_inner (size);
3191 if (degraded_mode) {
3192 p = alloc_degraded (vtable, size);
3199 p = (void*)nursery_next;
3200 nursery_next += size;
3201 if (nursery_next > nursery_frag_real_end) {
3206 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3207 memset (p, 0, size);
3210 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3212 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3213 res = search_fragment_for_size (tlab_size);
3215 /* get ready for possible collection */
3216 update_current_thread_stack (&dummy);
3217 minor_collect_or_expand_inner (tlab_size);
3218 if (degraded_mode) {
3219 p = alloc_degraded (vtable, size);
3226 /* Allocate a new TLAB from the current nursery fragment */
3227 tlab_start = nursery_next;
3228 nursery_next += tlab_size;
3229 tlab_next = tlab_start;
3230 tlab_real_end = tlab_start + tlab_size;
3231 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3233 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3234 memset (tlab_start, 0, tlab_size);
3236 /* Allocate from the TLAB */
3237 p = (void*)tlab_next;
3239 g_assert (tlab_next <= tlab_real_end);
3241 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3244 /* Reached tlab_temp_end */
3246 /* record the scan start so we can find pinned objects more easily */
3247 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3248 /* we just bump tlab_temp_end as well */
3249 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3250 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3254 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3263 * To be used for interned strings and possibly MonoThread, reflection handles.
3264 * We may want to explicitly free these objects.
3267 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3269 /* FIXME: handle OOM */
3271 size += ALLOC_ALIGN - 1;
3272 size &= ~(ALLOC_ALIGN - 1);
3274 if (size > MAX_FREELIST_SIZE) {
3275 update_current_thread_stack (&p);
3276 /* large objects are always pinned anyway */
3277 p = alloc_large_inner (vtable, size);
3279 p = alloc_from_freelist (size);
3280 memset (p, 0, size);
3282 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3289 * ######################################################################
3290 * ######## Finalization support
3291 * ######################################################################
3295 * this is valid for the nursery: if the object has been forwarded it means it's
3296 * still refrenced from a root. If it is pinned it's still alive as well.
3297 * Return TRUE if @obj is ready to be finalized.
3299 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3302 finalize_in_range (char *start, char *end)
3304 FinalizeEntry *entry, *prev;
3308 for (i = 0; i < finalizable_hash_size; ++i) {
3310 for (entry = finalizable_hash [i]; entry;) {
3311 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3312 if (object_is_fin_ready (entry->object)) {
3314 FinalizeEntry *next;
3315 /* remove and put in fin_ready_list */
3317 prev->next = entry->next;
3319 finalizable_hash [i] = entry->next;
3321 num_ready_finalizers++;
3322 num_registered_finalizers--;
3323 entry->next = fin_ready_list;
3324 fin_ready_list = entry;
3325 /* Make it survive */
3326 from = entry->object;
3327 entry->object = copy_object (entry->object, start, end);
3328 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3332 /* update pointer */
3333 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3334 entry->object = copy_object (entry->object, start, end);
3338 entry = entry->next;
3344 null_link_in_range (char *start, char *end)
3346 FinalizeEntry *entry, *prev;
3348 for (i = 0; i < disappearing_link_hash_size; ++i) {
3350 for (entry = disappearing_link_hash [i]; entry;) {
3351 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3352 if (object_is_fin_ready (entry->object)) {
3353 void **p = entry->data;
3356 /* remove from list */
3358 prev->next = entry->next;
3360 disappearing_link_hash [i] = entry->next;
3361 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3363 free_internal_mem (entry);
3365 num_disappearing_links--;
3369 /* update pointer if it's moved
3370 * FIXME: what if an object is moved earlier?
3372 entry->object = copy_object (entry->object, start, end);
3373 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3375 *link = entry->object;
3379 entry = entry->next;
3385 * mono_gc_finalizers_for_domain:
3386 * @domain: the unloading appdomain
3387 * @out_array: output array
3388 * @out_size: size of output array
3390 * Store inside @out_array up to @out_size objects that belong to the unloading
3391 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3392 * until it returns 0.
3393 * The items are removed from the finalizer data structure, so the caller is supposed
3395 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3398 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3400 FinalizeEntry *entry, *prev;
3402 if (no_finalize || !out_size || !out_array)
3406 for (i = 0; i < finalizable_hash_size; ++i) {
3408 for (entry = finalizable_hash [i]; entry;) {
3409 if (mono_object_domain (entry->object) == domain) {
3410 FinalizeEntry *next;
3411 /* remove and put in out_array */
3413 prev->next = entry->next;
3415 finalizable_hash [i] = entry->next;
3417 num_registered_finalizers--;
3418 out_array [count ++] = entry->object;
3419 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3421 if (count == out_size) {
3428 entry = entry->next;
3436 rehash_fin_table (void)
3440 FinalizeEntry **new_hash;
3441 FinalizeEntry *entry, *next;
3442 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3444 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3445 for (i = 0; i < finalizable_hash_size; ++i) {
3446 for (entry = finalizable_hash [i]; entry; entry = next) {
3447 hash = mono_object_hash (entry->object) % new_size;
3449 entry->next = new_hash [hash];
3450 new_hash [hash] = entry;
3453 free_internal_mem (finalizable_hash);
3454 finalizable_hash = new_hash;
3455 finalizable_hash_size = new_size;
3459 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3461 FinalizeEntry *entry, *prev;
3465 hash = mono_object_hash (obj);
3467 if (num_registered_finalizers >= finalizable_hash_size * 2)
3468 rehash_fin_table ();
3469 hash %= finalizable_hash_size;
3471 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3472 if (entry->object == obj) {
3474 entry->data = user_data;
3476 /* remove from the list */
3478 prev->next = entry->next;
3480 finalizable_hash [hash] = entry->next;
3481 num_registered_finalizers--;
3482 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3483 free_internal_mem (entry);
3491 /* request to deregister, but already out of the list */
3495 entry = get_internal_mem (sizeof (FinalizeEntry));
3496 entry->object = obj;
3497 entry->data = user_data;
3498 entry->next = finalizable_hash [hash];
3499 finalizable_hash [hash] = entry;
3500 num_registered_finalizers++;
3501 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3506 rehash_dislink (void)
3510 FinalizeEntry **new_hash;
3511 FinalizeEntry *entry, *next;
3512 int new_size = g_spaced_primes_closest (num_disappearing_links);
3514 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3515 for (i = 0; i < disappearing_link_hash_size; ++i) {
3516 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3517 hash = mono_aligned_addr_hash (entry->data) % new_size;
3519 entry->next = new_hash [hash];
3520 new_hash [hash] = entry;
3523 free_internal_mem (disappearing_link_hash);
3524 disappearing_link_hash = new_hash;
3525 disappearing_link_hash_size = new_size;
3529 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3531 FinalizeEntry *entry, *prev;
3535 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3537 /* FIXME: add check that link is not in the heap */
3538 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3539 entry = disappearing_link_hash [hash];
3541 for (; entry; entry = entry->next) {
3542 /* link already added */
3543 if (link == entry->data) {
3544 /* NULL obj means remove */
3547 prev->next = entry->next;
3549 disappearing_link_hash [hash] = entry->next;
3550 num_disappearing_links--;
3551 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3552 free_internal_mem (entry);
3554 entry->object = obj; /* we allow the change of object */
3561 entry = get_internal_mem (sizeof (FinalizeEntry));
3562 entry->object = obj;
3564 entry->next = disappearing_link_hash [hash];
3565 disappearing_link_hash [hash] = entry;
3566 num_disappearing_links++;
3567 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3572 mono_gc_invoke_finalizers (void)
3574 FinalizeEntry *entry;
3577 /* FIXME: batch to reduce lock contention */
3578 while (fin_ready_list) {
3580 entry = fin_ready_list;
3582 fin_ready_list = entry->next;
3583 num_ready_finalizers--;
3584 obj = entry->object;
3585 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3589 void (*callback)(void *, void*) = entry->data;
3591 obj = entry->object;
3593 /* the object is on the stack so it is pinned */
3594 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3595 callback (obj, NULL);
3596 free_internal_mem (entry);
3603 mono_gc_pending_finalizers (void)
3605 return fin_ready_list != NULL;
3608 /* Negative value to remove */
3610 mono_gc_add_memory_pressure (gint64 value)
3612 /* FIXME: Use interlocked functions */
3614 memory_pressure += value;
3619 * ######################################################################
3620 * ######## registered roots support
3621 * ######################################################################
3629 RootRecord **new_hash;
3630 RootRecord *entry, *next;
3631 int new_size = g_spaced_primes_closest (num_roots_entries);
3633 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3634 for (i = 0; i < roots_hash_size; ++i) {
3635 for (entry = roots_hash [i]; entry; entry = next) {
3636 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3638 entry->next = new_hash [hash];
3639 new_hash [hash] = entry;
3642 free_internal_mem (roots_hash);
3643 roots_hash = new_hash;
3644 roots_hash_size = new_size;
3648 * We do not coalesce roots.
3651 mono_gc_register_root (char *start, size_t size, void *descr)
3653 RootRecord *new_root;
3654 unsigned int hash = mono_aligned_addr_hash (start);
3656 if (num_roots_entries >= roots_hash_size * 2)
3658 hash %= roots_hash_size;
3659 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3660 /* we allow changing the size and the descriptor (for thread statics etc) */
3661 if (new_root->start_root == start) {
3662 size_t old_size = new_root->end_root - new_root->start_root;
3663 new_root->end_root = new_root->start_root + size;
3664 new_root->root_desc = (mword)descr;
3666 roots_size -= old_size;
3671 new_root = get_internal_mem (sizeof (RootRecord));
3673 new_root->start_root = start;
3674 new_root->end_root = new_root->start_root + size;
3675 new_root->root_desc = (mword)descr;
3677 num_roots_entries++;
3678 new_root->next = roots_hash [hash];
3679 roots_hash [hash] = new_root;
3680 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3690 mono_gc_deregister_root (char* addr)
3692 RootRecord *tmp, *prev = NULL;
3693 unsigned int hash = mono_aligned_addr_hash (addr);
3695 hash %= roots_hash_size;
3696 tmp = roots_hash [hash];
3698 if (tmp->start_root == (char*)addr) {
3700 prev->next = tmp->next;
3702 roots_hash [hash] = tmp->next;
3703 roots_size -= (tmp->end_root - tmp->start_root);
3704 num_roots_entries--;
3705 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3706 free_internal_mem (tmp);
3716 * ######################################################################
3717 * ######## Thread handling (stop/start code)
3718 * ######################################################################
3721 /* eventually share with MonoThread? */
3722 typedef struct _SgenThreadInfo SgenThreadInfo;
3724 struct _SgenThreadInfo {
3725 SgenThreadInfo *next;
3726 ARCH_THREAD_TYPE id;
3727 unsigned int stop_count; /* to catch duplicate signals */
3732 char **tlab_next_addr;
3733 char **tlab_start_addr;
3734 char **tlab_temp_end_addr;
3735 char **tlab_real_end_addr;
3736 RememberedSet *remset;
3739 /* FIXME: handle large/small config */
3740 #define THREAD_HASH_SIZE 11
3741 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3743 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3745 #if USE_SIGNAL_BASED_START_STOP_WORLD
3747 static sem_t suspend_ack_semaphore;
3748 static unsigned int global_stop_count = 0;
3749 static int suspend_signal_num = SIGPWR;
3750 static int restart_signal_num = SIGXCPU;
3751 static sigset_t suspend_signal_mask;
3752 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3754 /* LOCKING: assumes the GC lock is held */
3755 static SgenThreadInfo*
3756 thread_info_lookup (ARCH_THREAD_TYPE id)
3758 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3759 SgenThreadInfo *info;
3761 info = thread_table [hash];
3762 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
3769 update_current_thread_stack (void *start)
3771 void *ptr = cur_thread_regs;
3772 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
3773 info->stack_start = align_pointer (&ptr);
3774 ARCH_STORE_REGS (ptr);
3778 signal_desc (int signum)
3780 if (signum == suspend_signal_num)
3782 if (signum == restart_signal_num)
3787 /* LOCKING: assumes the GC lock is held */
3789 thread_handshake (int signum)
3791 int count, i, result;
3792 SgenThreadInfo *info;
3793 pthread_t me = pthread_self ();
3796 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3797 for (info = thread_table [i]; info; info = info->next) {
3798 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3799 if (ARCH_THREAD_EQUALS (info->id, me)) {
3800 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3803 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3805 result = pthread_kill (info->id, signum);
3807 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3810 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3816 for (i = 0; i < count; ++i) {
3817 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3818 if (errno != EINTR) {
3819 g_error ("sem_wait ()");
3826 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3828 suspend_handler (int sig)
3830 SgenThreadInfo *info;
3833 int old_errno = errno;
3835 id = pthread_self ();
3836 info = thread_info_lookup (id);
3837 stop_count = global_stop_count;
3838 /* duplicate signal */
3839 if (0 && info->stop_count == stop_count) {
3843 /* update the remset info in the thread data structure */
3844 info->remset = remembered_set;
3846 * this includes the register values that the kernel put on the stack.
3847 * Write arch-specific code to only push integer regs and a more accurate
3850 info->stack_start = align_pointer (&id);
3852 /* notify the waiting thread */
3853 sem_post (&suspend_ack_semaphore);
3854 info->stop_count = stop_count;
3856 /* wait until we receive the restart signal */
3859 sigsuspend (&suspend_signal_mask);
3860 } while (info->signal != restart_signal_num);
3862 /* notify the waiting thread */
3863 sem_post (&suspend_ack_semaphore);
3869 restart_handler (int sig)
3871 SgenThreadInfo *info;
3872 int old_errno = errno;
3874 info = thread_info_lookup (pthread_self ());
3875 info->signal = restart_signal_num;
3880 static TV_DECLARE (stop_world_time);
3881 static unsigned long max_pause_usec = 0;
3883 /* LOCKING: assumes the GC lock is held */
3889 global_stop_count++;
3890 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
3891 TV_GETTIME (stop_world_time);
3892 count = thread_handshake (suspend_signal_num);
3893 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3897 /* LOCKING: assumes the GC lock is held */
3899 restart_world (void)
3902 TV_DECLARE (end_sw);
3905 count = thread_handshake (restart_signal_num);
3906 TV_GETTIME (end_sw);
3907 usec = TV_ELAPSED (stop_world_time, end_sw);
3908 max_pause_usec = MAX (usec, max_pause_usec);
3909 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3913 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
3916 * Identify objects pinned in a thread stack and its registers.
3919 pin_thread_data (void *start_nursery, void *end_nursery)
3922 SgenThreadInfo *info;
3924 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3925 for (info = thread_table [i]; info; info = info->next) {
3927 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3930 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3931 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3934 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3935 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
3939 find_pinning_ref_from_thread (char *obj, size_t size)
3942 SgenThreadInfo *info;
3943 char *endobj = obj + size;
3945 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3946 for (info = thread_table [i]; info; info = info->next) {
3947 char **start = (char**)info->stack_start;
3950 while (start < (char**)info->stack_end) {
3951 if (*start >= obj && *start < endobj) {
3952 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
3958 /* FIXME: check register */
3961 /* return TRUE if ptr points inside the managed heap */
3963 ptr_in_heap (void* ptr)
3965 mword p = (mword)ptr;
3966 if (p < lowest_heap_address || p >= highest_heap_address)
3968 /* FIXME: more checks */
3973 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
3979 /* FIXME: exclude stack locations */
3980 switch ((*p) & REMSET_TYPE_MASK) {
3981 case REMSET_LOCATION:
3983 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
3984 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3985 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
3986 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3987 add_to_global_remset (ptr);
3989 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
3993 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3994 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
3997 while (count-- > 0) {
3998 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3999 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
4000 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
4001 add_to_global_remset (ptr);
4006 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4007 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4009 scan_object (*ptr, start_nursery, end_nursery);
4012 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4013 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4016 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4019 g_assert_not_reached ();
4025 scan_from_remsets (void *start_nursery, void *end_nursery)
4028 SgenThreadInfo *info;
4029 RememberedSet *remset, *next;
4032 /* the global one */
4033 for (remset = global_remset; remset; remset = remset->next) {
4034 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4035 for (p = remset->data; p < remset->store_next;) {
4036 p = handle_remset (p, start_nursery, end_nursery, TRUE);
4039 /* the per-thread ones */
4040 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4041 for (info = thread_table [i]; info; info = info->next) {
4042 for (remset = info->remset; remset; remset = next) {
4043 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4044 for (p = remset->data; p < remset->store_next;) {
4045 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4047 remset->store_next = remset->data;
4048 next = remset->next;
4049 remset->next = NULL;
4050 if (remset != info->remset) {
4051 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4052 free_internal_mem (remset);
4060 * Clear the info in the remembered sets: we're doing a major collection, so
4061 * the per-thread ones are not needed and the global ones will be reconstructed
4065 clear_remsets (void)
4068 SgenThreadInfo *info;
4069 RememberedSet *remset, *next;
4071 /* the global list */
4072 for (remset = global_remset; remset; remset = next) {
4073 remset->store_next = remset->data;
4074 next = remset->next;
4075 remset->next = NULL;
4076 if (remset != global_remset) {
4077 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4078 free_internal_mem (remset);
4081 /* the per-thread ones */
4082 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4083 for (info = thread_table [i]; info; info = info->next) {
4084 for (remset = info->remset; remset; remset = next) {
4085 remset->store_next = remset->data;
4086 next = remset->next;
4087 remset->next = NULL;
4088 if (remset != info->remset) {
4089 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4090 free_internal_mem (remset);
4098 * Clear the thread local TLAB variables for all threads.
4103 SgenThreadInfo *info;
4106 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4107 for (info = thread_table [i]; info; info = info->next) {
4108 /* A new TLAB will be allocated when the thread does its first allocation */
4109 *info->tlab_start_addr = NULL;
4110 *info->tlab_next_addr = NULL;
4111 *info->tlab_temp_end_addr = NULL;
4112 *info->tlab_real_end_addr = NULL;
4118 * Find the tlab_next value of the TLAB which contains ADDR.
4121 find_tlab_next_from_address (char *addr)
4123 SgenThreadInfo *info;
4126 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4127 for (info = thread_table [i]; info; info = info->next) {
4128 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4129 return *info->tlab_next_addr;
4136 /* LOCKING: assumes the GC lock is held */
4137 static SgenThreadInfo*
4138 gc_register_current_thread (void *addr)
4141 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4144 info->id = ARCH_GET_THREAD ();
4145 info->stop_count = -1;
4148 info->stack_start = NULL;
4149 info->tlab_start_addr = &tlab_start;
4150 info->tlab_next_addr = &tlab_next;
4151 info->tlab_temp_end_addr = &tlab_temp_end;
4152 info->tlab_real_end_addr = &tlab_real_end;
4154 tlab_next_addr = &tlab_next;
4156 /* try to get it with attributes first */
4157 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4161 pthread_attr_t attr;
4162 pthread_getattr_np (pthread_self (), &attr);
4163 pthread_attr_getstack (&attr, &sstart, &size);
4164 info->stack_end = (char*)sstart + size;
4165 pthread_attr_destroy (&attr);
4167 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4168 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4171 /* FIXME: we assume the stack grows down */
4172 gsize stack_bottom = (gsize)addr;
4173 stack_bottom += 4095;
4174 stack_bottom &= ~4095;
4175 info->stack_end = (char*)stack_bottom;
4179 /* hash into the table */
4180 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4181 info->next = thread_table [hash];
4182 thread_table [hash] = info;
4184 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4185 pthread_setspecific (remembered_set_key, remembered_set);
4186 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4191 unregister_current_thread (void)
4194 SgenThreadInfo *prev = NULL;
4196 RememberedSet *rset;
4197 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4199 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4200 p = thread_table [hash];
4202 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4203 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4208 thread_table [hash] = p->next;
4210 prev->next = p->next;
4213 /* FIXME: transfer remsets if any */
4215 RememberedSet *next = rset->next;
4216 free_internal_mem (rset);
4223 unregister_thread (void *k)
4226 unregister_current_thread ();
4231 mono_gc_register_thread (void *baseptr)
4233 SgenThreadInfo *info;
4235 info = thread_info_lookup (ARCH_GET_THREAD ());
4237 info = gc_register_current_thread (baseptr);
4239 return info != NULL;
4242 #if USE_PTHREAD_INTERCEPT
4244 #undef pthread_create
4246 #undef pthread_detach
4249 void *(*start_routine) (void *);
4253 } SgenThreadStartInfo;
4256 gc_start_thread (void *arg)
4258 SgenThreadStartInfo *start_info = arg;
4259 SgenThreadInfo* info;
4260 void *t_arg = start_info->arg;
4261 void *(*start_func) (void*) = start_info->start_routine;
4265 info = gc_register_current_thread (&result);
4267 sem_post (&(start_info->registered));
4268 result = start_func (t_arg);
4270 * this is done by the pthread key dtor
4272 unregister_current_thread ();
4280 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4282 SgenThreadStartInfo *start_info;
4285 start_info = malloc (sizeof (SgenThreadStartInfo));
4288 sem_init (&(start_info->registered), 0, 0);
4289 start_info->arg = arg;
4290 start_info->start_routine = start_routine;
4292 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4294 while (sem_wait (&(start_info->registered)) != 0) {
4295 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4298 sem_destroy (&(start_info->registered));
4304 mono_gc_pthread_join (pthread_t thread, void **retval)
4306 return pthread_join (thread, retval);
4310 mono_gc_pthread_detach (pthread_t thread)
4312 return pthread_detach (thread);
4315 #endif /* USE_PTHREAD_INTERCEPT */
4318 * ######################################################################
4319 * ######## Write barriers
4320 * ######################################################################
4323 static RememberedSet*
4324 alloc_remset (int size, gpointer id) {
4325 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4326 res->store_next = res->data;
4327 res->end_set = res->data + size;
4329 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4334 * Note: the write barriers first do the needed GC work and then do the actual store:
4335 * this way the value is visible to the conservative GC scan after the write barrier
4336 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4337 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4338 * happens when we need to record which pointers contain references to the new generation.
4339 * The write barrier will be executed, but the pointer is still not stored.
4342 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4345 if ((char*)field_ptr >= nursery_start && (char*)field_ptr < nursery_real_end) {
4346 *(void**)field_ptr = value;
4349 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4350 rs = remembered_set;
4351 if (rs->store_next < rs->end_set) {
4352 *(rs->store_next++) = (mword)field_ptr;
4353 *(void**)field_ptr = value;
4356 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4357 rs->next = remembered_set;
4358 remembered_set = rs;
4359 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4360 *(rs->store_next++) = (mword)field_ptr;
4361 *(void**)field_ptr = value;
4365 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4367 RememberedSet *rs = remembered_set;
4368 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end) {
4369 *(void**)slot_ptr = value;
4372 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4373 if (rs->store_next < rs->end_set) {
4374 *(rs->store_next++) = (mword)slot_ptr;
4375 *(void**)slot_ptr = value;
4378 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4379 rs->next = remembered_set;
4380 remembered_set = rs;
4381 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4382 *(rs->store_next++) = (mword)slot_ptr;
4383 *(void**)slot_ptr = value;
4387 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4389 RememberedSet *rs = remembered_set;
4390 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end)
4392 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4393 if (rs->store_next + 1 < rs->end_set) {
4394 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4395 *(rs->store_next++) = count;
4398 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4399 rs->next = remembered_set;
4400 remembered_set = rs;
4401 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4402 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4403 *(rs->store_next++) = count;
4407 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4409 RememberedSet *rs = remembered_set;
4410 if ((char*)ptr >= nursery_start && (char*)ptr < nursery_real_end) {
4411 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4412 *(void**)ptr = value;
4415 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4416 /* FIXME: ensure it is on the heap */
4417 if (rs->store_next < rs->end_set) {
4418 *(rs->store_next++) = (mword)ptr;
4419 *(void**)ptr = value;
4422 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4423 rs->next = remembered_set;
4424 remembered_set = rs;
4425 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4426 *(rs->store_next++) = (mword)ptr;
4427 *(void**)ptr = value;
4431 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4433 RememberedSet *rs = remembered_set;
4434 if ((char*)dest >= nursery_start && (char*)dest < nursery_real_end) {
4437 DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4439 if (rs->store_next + 1 < rs->end_set) {
4440 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4441 *(rs->store_next++) = (mword)klass->gc_descr;
4444 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4445 rs->next = remembered_set;
4446 remembered_set = rs;
4447 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4448 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4449 *(rs->store_next++) = (mword)klass->gc_descr;
4453 * mono_gc_wbarrier_object:
4455 * Write barrier to call when obj is the result of a clone or copy of an object.
4458 mono_gc_wbarrier_object (MonoObject* obj)
4460 RememberedSet *rs = remembered_set;
4461 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4462 if (rs->store_next < rs->end_set) {
4463 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4466 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4467 rs->next = remembered_set;
4468 remembered_set = rs;
4469 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4470 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4474 * ######################################################################
4475 * ######## Collector debugging
4476 * ######################################################################
4479 const char*descriptor_types [] = {
4491 describe_ptr (char *ptr)
4493 GCMemSection *section;
4498 if ((ptr >= nursery_start) && (ptr < nursery_real_end)) {
4499 printf ("Pointer inside nursery.\n");
4501 for (section = section_list; section;) {
4502 if (ptr >= section->data && ptr < section->data + section->size)
4504 section = section->next;
4508 printf ("Pointer inside oldspace.\n");
4510 printf ("Pointer unknown.\n");
4515 // FIXME: Handle pointers to the inside of objects
4516 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
4518 printf ("VTable: %p\n", vtable);
4519 if (vtable == NULL) {
4520 printf ("VTable is invalid (empty).\n");
4523 if (((char*)vtable >= nursery_start) && ((char*)vtable < nursery_real_end)) {
4524 printf ("VTable is invalid (points inside nursery).\n");
4527 printf ("Class: %s\n", vtable->klass->name);
4529 desc = ((GCVTable*)vtable)->desc;
4530 printf ("Descriptor: %lx\n", desc);
4533 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
4537 find_in_remset_loc (mword *p, char *addr, gboolean *found)
4543 switch ((*p) & REMSET_TYPE_MASK) {
4544 case REMSET_LOCATION:
4545 if (*p == (mword)addr)
4549 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4551 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4555 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4556 count = safe_object_get_size ((MonoObject*)ptr);
4557 count += (ALLOC_ALIGN - 1);
4558 count &= (ALLOC_ALIGN - 1);
4559 count /= sizeof (mword);
4560 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4564 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4567 switch (desc & 0x7) {
4568 case DESC_TYPE_RUN_LENGTH:
4569 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
4570 /* The descriptor includes the size of MonoObject */
4571 skip_size -= sizeof (MonoObject);
4572 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
4577 g_assert_not_reached ();
4582 g_assert_not_reached ();
4588 * Return whenever ADDR occurs in the remembered sets
4591 find_in_remsets (char *addr)
4594 SgenThreadInfo *info;
4595 RememberedSet *remset;
4597 gboolean found = FALSE;
4599 /* the global one */
4600 for (remset = global_remset; remset; remset = remset->next) {
4601 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4602 for (p = remset->data; p < remset->store_next;) {
4603 p = find_in_remset_loc (p, addr, &found);
4608 /* the per-thread ones */
4609 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4610 for (info = thread_table [i]; info; info = info->next) {
4611 for (remset = info->remset; remset; remset = remset->next) {
4612 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4613 for (p = remset->data; p < remset->store_next;) {
4614 p = find_in_remset_loc (p, addr, &found);
4626 #define HANDLE_PTR(ptr,obj) do { \
4627 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
4628 if (!find_in_remsets ((char*)(ptr))) { \
4629 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
4630 g_assert_not_reached (); \
4636 * Check that each object reference inside the area which points into the nursery
4637 * can be found in the remembered sets.
4639 static void __attribute__((noinline))
4640 check_remsets_for_area (char *start, char *end)
4645 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
4647 new_obj_references = 0;
4648 obj_references_checked = 0;
4649 while (start < end) {
4650 if (!*(void**)start) {
4651 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
4654 vt = (GCVTable*)LOAD_VTABLE (start);
4655 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
4657 MonoObject *obj = (MonoObject*)start;
4658 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
4662 if (type == DESC_TYPE_STRING) {
4663 STRING_SIZE (skip_size, start);
4667 } else if (type == DESC_TYPE_RUN_LENGTH) {
4668 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
4669 g_assert (skip_size);
4670 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
4674 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
4675 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
4676 skip_size *= mono_array_length ((MonoArray*)start);
4677 skip_size += sizeof (MonoArray);
4678 skip_size += (ALLOC_ALIGN - 1);
4679 skip_size &= ~(ALLOC_ALIGN - 1);
4680 OBJ_VECTOR_FOREACH_PTR (vt, start);
4681 if (((MonoArray*)start)->bounds) {
4682 /* account for the bounds */
4683 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4688 } else if (type == DESC_TYPE_SMALL_BITMAP) {
4689 OBJ_BITMAP_SIZE (skip_size, desc, start);
4690 g_assert (skip_size);
4691 OBJ_BITMAP_FOREACH_PTR (desc,start);
4695 } else if (type == DESC_TYPE_LARGE_BITMAP) {
4696 skip_size = safe_object_get_size ((MonoObject*)start);
4697 skip_size += (ALLOC_ALIGN - 1);
4698 skip_size &= ~(ALLOC_ALIGN - 1);
4699 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
4703 } else if (type == DESC_TYPE_COMPLEX) {
4704 /* this is a complex object */
4705 skip_size = safe_object_get_size ((MonoObject*)start);
4706 skip_size += (ALLOC_ALIGN - 1);
4707 skip_size &= ~(ALLOC_ALIGN - 1);
4708 OBJ_COMPLEX_FOREACH_PTR (vt, start);
4712 } else if (type == DESC_TYPE_COMPLEX_ARR) {
4713 /* this is an array of complex structs */
4714 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
4715 skip_size *= mono_array_length ((MonoArray*)start);
4716 skip_size += sizeof (MonoArray);
4717 skip_size += (ALLOC_ALIGN - 1);
4718 skip_size &= ~(ALLOC_ALIGN - 1);
4719 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
4720 if (((MonoArray*)start)->bounds) {
4721 /* account for the bounds */
4722 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4734 * Perform consistency check of the heap.
4736 * Assumes the world is stopped.
4739 check_consistency (void)
4741 GCMemSection *section;
4743 // Need to add more checks
4744 // FIXME: Create a general heap enumeration function and use that
4746 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
4748 // Check that oldspace->newspace pointers are registered with the collector
4749 for (section = section_list; section; section = section->next) {
4750 if (section->role == MEMORY_ROLE_GEN0)
4752 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
4753 check_remsets_for_area (section->data, section->next_data);
4756 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
4759 /* Check that the reference is valid */
4761 #define HANDLE_PTR(ptr,obj) do { \
4763 g_assert (safe_name (*(ptr)) != NULL); \
4770 * Perform consistency check on an object. Currently we only check that the
4771 * reference fields are valid.
4774 check_object (char *start)
4783 vt = (GCVTable*)LOAD_VTABLE (start);
4784 //type = vt->desc & 0x7;
4787 switch (desc & 0x7) {
4788 case DESC_TYPE_STRING:
4789 STRING_SIZE (skip_size, start);
4790 return start + skip_size;
4791 case DESC_TYPE_RUN_LENGTH:
4792 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
4793 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
4794 g_assert (skip_size);
4795 return start + skip_size;
4796 case DESC_TYPE_ARRAY:
4797 case DESC_TYPE_VECTOR:
4798 OBJ_VECTOR_FOREACH_PTR (vt, start);
4799 skip_size = safe_object_get_size ((MonoObject*)start);
4800 skip_size += (ALLOC_ALIGN - 1);
4801 skip_size &= ~(ALLOC_ALIGN - 1);
4802 return start + skip_size;
4803 case DESC_TYPE_SMALL_BITMAP:
4804 OBJ_BITMAP_FOREACH_PTR (desc,start);
4805 OBJ_BITMAP_SIZE (skip_size, desc, start);
4806 return start + skip_size;
4807 case DESC_TYPE_LARGE_BITMAP:
4808 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
4809 skip_size = safe_object_get_size ((MonoObject*)start);
4810 skip_size += (ALLOC_ALIGN - 1);
4811 skip_size &= ~(ALLOC_ALIGN - 1);
4812 return start + skip_size;
4813 case DESC_TYPE_COMPLEX:
4814 OBJ_COMPLEX_FOREACH_PTR (vt, start);
4815 /* this is a complex object */
4816 skip_size = safe_object_get_size ((MonoObject*)start);
4817 skip_size += (ALLOC_ALIGN - 1);
4818 skip_size &= ~(ALLOC_ALIGN - 1);
4819 return start + skip_size;
4820 case DESC_TYPE_COMPLEX_ARR:
4821 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
4822 /* this is an array of complex structs */
4823 skip_size = safe_object_get_size ((MonoObject*)start);
4824 skip_size += (ALLOC_ALIGN - 1);
4825 skip_size &= ~(ALLOC_ALIGN - 1);
4826 return start + skip_size;
4828 g_assert_not_reached ();
4833 * ######################################################################
4834 * ######## Other mono public interface functions.
4835 * ######################################################################
4839 mono_gc_collect (int generation)
4842 update_current_thread_stack (&generation);
4844 if (generation == 0) {
4845 collect_nursery (0);
4847 major_collection ();
4854 mono_gc_max_generation (void)
4860 mono_gc_collection_count (int generation)
4862 if (generation == 0)
4863 return num_minor_gcs;
4864 return num_major_gcs;
4868 mono_gc_get_used_size (void)
4871 GCMemSection *section;
4873 tot = los_memory_usage;
4874 for (section = section_list; section; section = section->next) {
4875 /* this is approximate... */
4876 tot += section->next_data - section->data;
4878 /* FIXME: account for pinned objects */
4884 mono_gc_get_heap_size (void)
4890 mono_gc_disable (void)
4898 mono_gc_enable (void)
4906 mono_object_is_alive (MonoObject* o)
4912 mono_gc_get_generation (MonoObject *obj)
4914 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
4920 mono_gc_enable_events (void)
4925 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
4927 mono_gc_register_disappearing_link (obj, link_addr);
4932 mono_gc_weak_link_remove (void **link_addr)
4934 mono_gc_register_disappearing_link (NULL, link_addr);
4939 mono_gc_weak_link_get (void **link_addr)
4945 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
4947 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
4948 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
4951 /* conservative scanning */
4952 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
4957 mono_gc_alloc_fixed (size_t size, void *descr)
4959 /* FIXME: do a single allocation */
4960 void *res = calloc (1, size);
4963 if (!mono_gc_register_root (res, size, descr)) {
4971 mono_gc_free_fixed (void* addr)
4973 mono_gc_deregister_root (addr);
4978 mono_gc_is_gc_thread (void)
4982 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
4988 mono_gc_base_init (void)
4992 struct sigaction sinfo;
4994 LOCK_INIT (gc_mutex);
4996 if (gc_initialized) {
5000 pagesize = mono_pagesize ();
5001 gc_debug_file = stderr;
5002 if ((env = getenv ("MONO_GC_DEBUG"))) {
5003 opts = g_strsplit (env, ",", -1);
5004 for (ptr = opts; ptr && *ptr; ptr ++) {
5006 if (opt [0] >= '0' && opt [0] <= '9') {
5007 gc_debug_level = atoi (opt);
5012 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
5013 gc_debug_file = fopen (rf, "wb");
5015 gc_debug_file = stderr;
5018 } else if (!strcmp (opt, "collect-before-allocs")) {
5019 collect_before_allocs = TRUE;
5020 } else if (!strcmp (opt, "check-at-minor-collections")) {
5021 consistency_check_at_minor_collection = TRUE;
5023 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
5024 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
5025 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections.\n");
5032 sem_init (&suspend_ack_semaphore, 0, 0);
5034 sigfillset (&sinfo.sa_mask);
5035 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
5036 sinfo.sa_handler = suspend_handler;
5037 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
5038 g_error ("failed sigaction");
5041 sinfo.sa_handler = restart_handler;
5042 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
5043 g_error ("failed sigaction");
5046 sigfillset (&suspend_signal_mask);
5047 sigdelset (&suspend_signal_mask, restart_signal_num);
5049 global_remset = alloc_remset (1024, NULL);
5050 global_remset->next = NULL;
5052 pthread_key_create (&remembered_set_key, unregister_thread);
5053 gc_initialized = TRUE;
5055 mono_gc_register_thread (&sinfo);
5063 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
5064 * for each class. This is currently not easy to do, as it is hard to generate basic
5065 * blocks + branches, but it is easy with the linear IL codebase.
5068 create_allocator (int atype)
5070 int tlab_next_addr_offset = -1;
5071 int tlab_temp_end_offset = -1;
5072 int p_var, size_var, tlab_next_addr_var, new_next_var;
5073 guint32 slowpath_branch;
5074 MonoMethodBuilder *mb;
5076 MonoMethodSignature *csig;
5077 static gboolean registered = FALSE;
5079 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5080 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5082 g_assert (tlab_next_addr_offset != -1);
5083 g_assert (tlab_temp_end_offset != -1);
5085 g_assert (atype == ATYPE_NORMAL);
5088 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5092 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5093 csig->ret = &mono_defaults.object_class->byval_arg;
5094 csig->params [0] = &mono_defaults.int_class->byval_arg;
5096 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5097 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5098 /* size = vtable->klass->instance_size; */
5099 mono_mb_emit_ldarg (mb, 0);
5100 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5101 mono_mb_emit_byte (mb, CEE_ADD);
5102 mono_mb_emit_byte (mb, CEE_LDIND_I);
5103 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5104 mono_mb_emit_byte (mb, CEE_ADD);
5105 /* FIXME: assert instance_size stays a 4 byte integer */
5106 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5107 mono_mb_emit_stloc (mb, size_var);
5109 /* size += ALLOC_ALIGN - 1; */
5110 mono_mb_emit_ldloc (mb, size_var);
5111 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5112 mono_mb_emit_byte (mb, CEE_ADD);
5113 /* size &= ~(ALLOC_ALIGN - 1); */
5114 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5115 mono_mb_emit_byte (mb, CEE_AND);
5116 mono_mb_emit_stloc (mb, size_var);
5119 * We need to modify tlab_next, but the JIT only supports reading, so we read
5120 * another tls var holding its address instead.
5123 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5124 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5125 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5126 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5127 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5128 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5130 /* p = (void**)tlab_next; */
5131 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5132 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5133 mono_mb_emit_byte (mb, CEE_LDIND_I);
5134 mono_mb_emit_stloc (mb, p_var);
5136 /* new_next = (char*)p + size; */
5137 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5138 mono_mb_emit_ldloc (mb, p_var);
5139 mono_mb_emit_ldloc (mb, size_var);
5140 mono_mb_emit_byte (mb, CEE_CONV_I);
5141 mono_mb_emit_byte (mb, CEE_ADD);
5142 mono_mb_emit_stloc (mb, new_next_var);
5144 /* tlab_next = new_next */
5145 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5146 mono_mb_emit_ldloc (mb, new_next_var);
5147 mono_mb_emit_byte (mb, CEE_STIND_I);
5149 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5150 mono_mb_emit_ldloc (mb, new_next_var);
5151 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5152 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5153 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5154 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5158 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5159 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5161 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5162 mono_mb_emit_ldarg (mb, 0);
5163 mono_mb_emit_ldloc (mb, size_var);
5164 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5165 mono_mb_emit_byte (mb, CEE_RET);
5168 mono_mb_patch_short_branch (mb, slowpath_branch);
5170 /* FIXME: Memory barrier */
5173 mono_mb_emit_ldloc (mb, p_var);
5174 mono_mb_emit_ldarg (mb, 0);
5175 mono_mb_emit_byte (mb, CEE_STIND_I);
5178 mono_mb_emit_ldloc (mb, p_var);
5179 mono_mb_emit_byte (mb, CEE_RET);
5181 res = mono_mb_create_method (mb, csig, 8);
5183 mono_method_get_header (res)->init_locals = FALSE;
5187 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5190 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5191 * The signature of the called method is:
5192 * object allocate (MonoVTable *vtable)
5195 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5197 int tlab_next_offset = -1;
5198 int tlab_temp_end_offset = -1;
5199 MonoClass *klass = vtable->klass;
5200 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5201 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5203 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5205 if (klass->instance_size > tlab_size)
5207 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5211 if (klass->byval_arg.type == MONO_TYPE_STRING)
5213 if (collect_before_allocs)
5216 return mono_gc_get_managed_allocator_by_type (0);
5220 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5226 mono_gc_get_managed_allocator_by_type (int atype)
5230 mono_loader_lock ();
5231 res = alloc_method_cache [atype];
5233 res = alloc_method_cache [atype] = create_allocator (atype);
5234 mono_loader_unlock ();
5239 mono_gc_get_managed_allocator_types (void)
5244 #endif /* HAVE_SGEN_GC */