2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
185 mono_gc_flush_info (void)
187 fflush (gc_debug_file);
190 #define MAX_DEBUG_LEVEL 9
191 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
193 #define TV_DECLARE(name) struct timeval name
194 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
195 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
197 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
207 /* each request from the OS ends up in a GCMemSection */
208 typedef struct _GCMemSection GCMemSection;
209 struct _GCMemSection {
213 /* pointer where more data could be allocated if it fits */
217 * scan starts is an array of pointers to objects equally spaced in the allocation area
218 * They let use quickly find pinned objects from pinning pointers.
221 /* in major collections indexes in the pin_queue for objects that pin this section */
224 unsigned short num_scan_start;
228 /* large object space struct: 64+ KB */
229 /* we could make this limit much smaller to avoid memcpy copy
230 * and potentially have more room in the GC descriptor: need to measure
231 * This also means that such small OS objects will need to be
232 * allocated in a different way (using pinned chunks).
233 * We may want to put large but smaller than 64k objects in the fixed space
234 * when we move the object from one generation to another (to limit the
235 * pig in the snake effect).
236 * Note: it may be worth to have an optimized copy function, since we can
237 * assume that objects are aligned and have a multiple of 8 size.
238 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
239 * true if MONO_ZERO_LEN_ARRAY is nonzero.
241 typedef struct _LOSObject LOSObject;
244 mword size; /* this is the object size */
245 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
248 char data [MONO_ZERO_LEN_ARRAY];
251 /* Pinned objects are allocated in the LOS space if bigger than half a page
252 * or from freelists otherwise. We assume that pinned objects are relatively few
253 * and they have a slow dying speed (like interned strings, thread objects).
254 * As such they will be collected only at major collections.
255 * free lists are not global: when we need memory we allocate a PinnedChunk.
256 * Each pinned chunk is made of several pages, the first of wich is used
257 * internally for bookeeping (here think of a page as 4KB). The bookeeping
258 * includes the freelists vectors and info about the object size of each page
259 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
260 * a size is assigned to it, the page is divided in the proper chunks and each
261 * chunk is added to the freelist. To not waste space, the remaining space in the
262 * first page is used as objects of size 16 or 32 (need to measure which are more
264 * We use this same structure to allocate memory used internally by the GC, so
265 * we never use malloc/free if we need to alloc during collection: the world is stopped
266 * and malloc/free will deadlock.
267 * When we want to iterate over pinned objects, we just scan a page at a time
268 * linearly according to the size of objects in the page: the next pointer used to link
269 * the items in the freelist uses the same word as the vtable. Since we keep freelists
270 * for each pinned chunk, if the word points outside the pinned chunk it means
272 * We could avoid this expensive scanning in creative ways. We could have a policy
273 * of putting in the pinned space only objects we know about that have no struct fields
274 * with references and we can easily use a even expensive write barrier for them,
275 * since pointer writes on such objects should be rare.
276 * The best compromise is to just alloc interned strings and System.MonoType in them.
277 * It would be nice to allocate MonoThread in it, too: must check that we properly
278 * use write barriers so we don't have to do any expensive scanning of the whole pinned
279 * chunk list during minor collections. We can avoid it now because we alloc in it only
280 * reference-free objects.
282 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
283 #define MAX_FREELIST_SIZE 2048
284 #define PINNED_PAGE_SIZE (4096)
285 #define PINNED_CHUNK_MIN_SIZE (4096*8)
286 typedef struct _PinnedChunk PinnedChunk;
287 struct _PinnedChunk {
290 int *page_sizes; /* a 0 means the page is still unused */
293 void *data [1]; /* page sizes and free lists are stored here */
296 /* The method used to clear the nursery */
297 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
298 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
303 CLEAR_AT_TLAB_CREATION
304 } NurseryClearPolicy;
306 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
309 * The young generation is divided into fragments. This is because
310 * we can hand one fragments to a thread for lock-less fast alloc and
311 * because the young generation ends up fragmented anyway by pinned objects.
312 * Once a collection is done, a list of fragments is created. When doing
313 * thread local alloc we use smallish nurseries so we allow new threads to
314 * allocate memory from gen0 without triggering a collection. Threads that
315 * are found to allocate lots of memory are given bigger fragments. This
316 * should make the finalizer thread use little nursery memory after a while.
317 * We should start assigning threads very small fragments: if there are many
318 * threads the nursery will be full of reserved space that the threads may not
319 * use at all, slowing down allocation speed.
320 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
321 * Allocation Buffers (TLABs).
323 typedef struct _Fragment Fragment;
327 char *fragment_start;
328 char *fragment_limit; /* the current soft limit for allocation */
332 /* the runtime can register areas of memory as roots: we keep two lists of roots,
333 * a pinned root set for conservatively scanned roots and a normal one for
334 * precisely scanned roots (currently implemented as a single list).
336 typedef struct _RootRecord RootRecord;
344 /* for use with write barriers */
345 typedef struct _RememberedSet RememberedSet;
346 struct _RememberedSet {
350 mword data [MONO_ZERO_LEN_ARRAY];
353 /* we have 4 possible values in the low 2 bits */
355 REMSET_LOCATION, /* just a pointer to the exact location */
356 REMSET_RANGE, /* range of pointer fields */
357 REMSET_OBJECT, /* mark all the object for scanning */
358 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
359 REMSET_TYPE_MASK = 0x3
362 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
363 static pthread_key_t remembered_set_key;
364 static RememberedSet *global_remset;
365 static int store_to_global_remset = 0;
367 /* FIXME: later choose a size that takes into account the RememberedSet struct
368 * and doesn't waste any alloc paddin space.
370 #define DEFAULT_REMSET_SIZE 1024
371 static RememberedSet* alloc_remset (int size, gpointer id);
373 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
374 * no cast from a pointer to an integer
381 /* these bits are set in the object vtable: we could merge them since an object can be
382 * either pinned or forwarded but not both.
383 * We store them in the vtable slot because the bits are used in the sync block for
384 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
385 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
386 * would be an invalid combination for the monitor and hash code).
387 * The values are already shifted.
388 * The forwarding address is stored in the sync block.
390 #define FORWARDED_BIT 1
392 #define VTABLE_BITS_MASK 0x3
394 /* returns NULL if not forwarded, or the forwarded address */
395 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
396 /* set the forwarded address fw_addr for object obj */
397 #define forward_object(obj,fw_addr) do { \
398 ((mword*)(obj))[0] |= FORWARDED_BIT; \
399 ((mword*)(obj))[1] = (mword)(fw_addr); \
402 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
403 #define pin_object(obj) do { \
404 ((mword*)(obj))[0] |= PINNED_BIT; \
406 #define unpin_object(obj) do { \
407 ((mword*)(obj))[0] &= ~PINNED_BIT; \
410 #define ptr_in_nursery(ptr) ((char*)ptr >= nursery_start && (char*)ptr < nursery_real_end)
413 * Since we set bits in the vtable, use the macro to load it from the pointer to
414 * an object that is potentially pinned.
416 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
419 safe_name (void* obj)
421 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
422 return vt->klass->name;
426 safe_object_get_size (MonoObject* o)
428 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
429 if (klass == mono_defaults.string_class) {
430 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
431 } else if (klass->rank) {
432 MonoArray *array = (MonoArray*)o;
433 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
437 size += sizeof (MonoArrayBounds) * klass->rank;
441 /* from a created object: the class must be inited already */
442 return klass->instance_size;
446 static inline gboolean
447 is_maybe_half_constructed (MonoObject *o)
451 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
452 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
453 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
460 * ######################################################################
461 * ######## Global data.
462 * ######################################################################
464 static LOCK_DECLARE (gc_mutex);
465 static int gc_disabled = 0;
466 static int num_minor_gcs = 0;
467 static int num_major_gcs = 0;
469 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
470 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
471 #define DEFAULT_NURSERY_SIZE (1024*512*2)
472 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
473 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
474 /* to quickly find the head of an object pinned by a conservative address
475 * we keep track of the objects allocated for each SCAN_START_SIZE memory
476 * chunk in the nursery or other memory sections. Larger values have less
477 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
479 #define SCAN_START_SIZE (4096*2)
480 /* the minimum size of a fragment that we consider useful for allocation */
481 #define FRAGMENT_MIN_SIZE (512)
482 /* This is a fixed value used for pinned chunks, not the system pagesize */
483 #define FREELIST_PAGESIZE 4096
485 static mword pagesize = 4096;
486 static mword nursery_size = DEFAULT_NURSERY_SIZE;
487 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
488 static mword max_section_size = DEFAULT_MAX_SECTION;
489 static int section_size_used = 0;
490 static int degraded_mode = 0;
492 static LOSObject *los_object_list = NULL;
493 static mword los_memory_usage = 0;
494 static mword los_num_objects = 0;
495 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
496 static mword total_alloc = 0;
497 /* use this to tune when to do a major/minor collection */
498 static mword memory_pressure = 0;
500 static GCMemSection *section_list = NULL;
501 static GCMemSection *nursery_section = NULL;
502 static mword lowest_heap_address = ~(mword)0;
503 static mword highest_heap_address = 0;
505 typedef struct _FinalizeEntry FinalizeEntry;
506 struct _FinalizeEntry {
509 void *data; /* can be a disappearing link or the data for the finalizer */
510 /* Note we could use just one pointer if we don't support multiple callbacks
511 * for finalizers and per-finalizer data and if we store the obj pointers
512 * in the link like libgc does
517 * The finalizable hash has the object as the key, the
518 * disappearing_link hash, has the link address as key.
520 static FinalizeEntry **finalizable_hash = NULL;
521 /* objects that are ready to be finalized */
522 static FinalizeEntry *fin_ready_list = NULL;
523 /* disappearing links use the same structure but a different list */
524 static FinalizeEntry **disappearing_link_hash = NULL;
525 static mword disappearing_link_hash_size = 0;
526 static mword finalizable_hash_size = 0;
528 static int num_registered_finalizers = 0;
529 static int num_ready_finalizers = 0;
530 static int num_disappearing_links = 0;
531 static int no_finalize = 0;
533 /* keep each size a multiple of ALLOC_ALIGN */
534 /* on 64 bit systems 8 is likely completely unused. */
535 static const int freelist_sizes [] = {
536 8, 16, 24, 32, 40, 48, 64, 80,
537 96, 128, 160, 192, 224, 256, 320, 384,
538 448, 512, 584, 680, 816, 1024, 1360, 2048};
539 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
541 static char* max_pinned_chunk_addr = NULL;
542 static char* min_pinned_chunk_addr = (char*)-1;
543 /* pinned_chunk_list is used for allocations of objects that are never moved */
544 static PinnedChunk *pinned_chunk_list = NULL;
545 /* internal_chunk_list is used for allocating structures needed by the GC */
546 static PinnedChunk *internal_chunk_list = NULL;
549 obj_is_from_pinned_alloc (char *p)
551 PinnedChunk *chunk = pinned_chunk_list;
552 for (; chunk; chunk = chunk->next) {
553 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
559 /* registered roots: the key to the hash is the root start address */
560 static RootRecord **roots_hash = NULL;
561 static int roots_hash_size = 0;
562 static mword roots_size = 0; /* amount of memory in the root set */
563 static int num_roots_entries = 0;
566 * The current allocation cursors
567 * We allocate objects in the nursery.
568 * The nursery is the area between nursery_start and nursery_real_end.
569 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
570 * from nursery fragments.
571 * tlab_next is the pointer to the space inside the TLAB where the next object will
573 * tlab_temp_end is the pointer to the end of the temporary space reserved for
574 * the allocation: it allows us to set the scan starts at reasonable intervals.
575 * tlab_real_end points to the end of the TLAB.
576 * nursery_frag_real_end points to the end of the currently used nursery fragment.
577 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
578 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
579 * At the next allocation, the area of the nursery where objects can be present is
580 * between MIN(nursery_first_pinned_start, first_fragment_start) and
581 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
583 static char *nursery_start = NULL;
586 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
587 * variables for next+temp_end ?
589 static __thread char *tlab_start;
590 static __thread char *tlab_next;
591 static __thread char *tlab_temp_end;
592 static __thread char *tlab_real_end;
593 /* Used by the managed allocator */
594 static __thread char **tlab_next_addr;
595 static char *nursery_next = NULL;
596 static char *nursery_frag_real_end = NULL;
597 static char *nursery_real_end = NULL;
598 static char *nursery_first_pinned_start = NULL;
599 static char *nursery_last_pinned_end = NULL;
601 /* The size of a TLAB */
602 /* The bigger the value, the less often we have to go to the slow path to allocate a new
603 * one, but the more space is wasted by threads not allocating much memory.
605 * FIXME: Make this self-tuning for each thread.
607 static guint32 tlab_size = (1024 * 4);
609 /* fragments that are free and ready to be used for allocation */
610 static Fragment *nursery_fragments = NULL;
611 /* freeelist of fragment structures */
612 static Fragment *fragment_freelist = NULL;
615 * used when moving the objects
616 * When the nursery is collected, objects are copied to to_space.
617 * The area between to_space and gray_objects is used as a stack
618 * of objects that need their fields checked for more references
620 * We should optimize somehow this mechanism to avoid rescanning
621 * ptr-free objects. The order is also probably not optimal: need to
622 * test cache misses and other graph traversal orders.
624 static char *to_space = NULL;
625 static char *gray_objects = NULL;
626 static char *to_space_end = NULL;
627 static GCMemSection *to_space_section = NULL;
629 /* objects bigger then this go into the large object space */
630 #define MAX_SMALL_OBJ_SIZE 0xffff
633 * ######################################################################
634 * ######## Macros and function declarations.
635 * ######################################################################
638 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
639 if ((mword)(low) < lowest_heap_address) \
640 lowest_heap_address = (mword)(low); \
641 if ((mword)(high) > highest_heap_address) \
642 highest_heap_address = (mword)(high); \
646 align_pointer (void *ptr)
648 mword p = (mword)ptr;
649 p += sizeof (gpointer) - 1;
650 p &= ~ (sizeof (gpointer) - 1);
654 /* forward declarations */
655 static void* get_internal_mem (size_t size);
656 static void free_internal_mem (void *addr);
657 static void* get_os_memory (size_t size, int activate);
658 static void free_os_memory (void *addr, size_t size);
659 static void report_internal_mem_usage (void);
661 static int stop_world (void);
662 static int restart_world (void);
663 static void pin_thread_data (void *start_nursery, void *end_nursery);
664 static void scan_from_remsets (void *start_nursery, void *end_nursery);
665 static void find_pinning_ref_from_thread (char *obj, size_t size);
666 static void update_current_thread_stack (void *start);
667 static GCMemSection* alloc_section (size_t size);
668 static void finalize_in_range (char *start, char *end);
669 static void null_link_in_range (char *start, char *end);
670 static gboolean search_fragment_for_size (size_t size);
671 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
672 static void clear_remsets (void);
673 static void clear_tlabs (void);
674 static char *find_tlab_next_from_address (char *addr);
675 static void sweep_pinned_objects (void);
676 static void scan_from_pinned_objects (char *addr_start, char *addr_end);
677 static void free_large_object (LOSObject *obj);
678 static void free_mem_section (GCMemSection *section);
680 void describe_ptr (char *ptr);
681 void check_consistency (void);
682 char* check_object (char *start);
685 * ######################################################################
686 * ######## GC descriptors
687 * ######################################################################
688 * Used to quickly get the info the GC needs about an object: size and
689 * where the references are held.
691 /* objects are aligned to 8 bytes boundaries
692 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
693 * The low 3 bits define the type of the descriptor. The other bits
694 * depend on the type.
695 * As a general rule the 13 remaining low bits define the size, either
696 * of the whole object or of the elements in the arrays. While for objects
697 * the size is already in bytes, for arrays we need to shift, because
698 * array elements might be smaller than 8 bytes. In case of arrays, we
699 * use two bits to describe what the additional high bits represents,
700 * so the default behaviour can handle element sizes less than 2048 bytes.
701 * The high 16 bits, if 0 it means the object is pointer-free.
702 * This design should make it easy and fast to skip over ptr-free data.
703 * The first 4 types should cover >95% of the objects.
704 * Note that since the size of objects is limited to 64K, larger objects
705 * will be allocated in the large object heap.
706 * If we want 4-bytes alignment, we need to put vector and small bitmap
710 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
711 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
712 DESC_TYPE_STRING, /* nothing */
713 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
714 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
715 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
716 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
717 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
718 /* subtypes for arrays and vectors */
719 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
720 DESC_TYPE_V_REFS, /* all the array elements are refs */
721 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
722 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
725 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
726 #define LOW_TYPE_BITS 3
727 #define SMALL_BITMAP_SHIFT 16
728 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
729 #define VECTOR_INFO_SHIFT 14
730 #define VECTOR_ELSIZE_SHIFT 3
731 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
732 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
733 #define SMALL_SIZE_MASK 0xfff8
734 #define MAX_ELEMENT_SIZE 0x3ff
735 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
736 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
737 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
738 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
739 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
741 #define ALLOC_ALIGN 8
744 /* Root bitmap descriptors are simpler: the lower two bits describe the type
745 * and we either have 30/62 bitmap bits or nibble-based run-length,
746 * or a complex descriptor
749 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
752 ROOT_DESC_LARGE_BITMAP,
753 ROOT_DESC_TYPE_MASK = 0x3,
754 ROOT_DESC_TYPE_SHIFT = 2,
757 static gsize* complex_descriptors = NULL;
758 static int complex_descriptors_size = 0;
759 static int complex_descriptors_next = 0;
762 alloc_complex_descriptor (gsize *bitmap, int numbits)
764 int nwords = numbits/GC_BITS_PER_WORD + 2;
769 res = complex_descriptors_next;
770 /* linear search, so we don't have duplicates with domain load/unload
771 * this should not be performance critical or we'd have bigger issues
772 * (the number and size of complex descriptors should be small).
774 for (i = 0; i < complex_descriptors_next; ) {
775 if (complex_descriptors [i] == nwords) {
777 for (j = 0; j < nwords - 1; ++j) {
778 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
788 i += complex_descriptors [i];
790 if (complex_descriptors_next + nwords > complex_descriptors_size) {
791 int new_size = complex_descriptors_size * 2 + nwords;
792 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
793 complex_descriptors_size = new_size;
795 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
796 complex_descriptors_next += nwords;
797 complex_descriptors [res] = nwords;
798 for (i = 0; i < nwords - 1; ++i) {
799 complex_descriptors [res + 1 + i] = bitmap [i];
800 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
807 * Descriptor builders.
810 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
812 return (void*) DESC_TYPE_STRING;
816 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
818 int first_set = -1, num_set = 0, last_set = -1, i;
820 size_t stored_size = obj_size;
821 stored_size += ALLOC_ALIGN - 1;
822 stored_size &= ~(ALLOC_ALIGN - 1);
823 for (i = 0; i < numbits; ++i) {
824 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
831 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
832 /* check run-length encoding first: one byte offset, one byte number of pointers
833 * on 64 bit archs, we can have 3 runs, just one on 32.
834 * It may be better to use nibbles.
837 desc = DESC_TYPE_RUN_LENGTH | stored_size;
838 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
840 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
841 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
842 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
845 /* we know the 2-word header is ptr-free */
846 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
847 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
848 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
852 /* we know the 2-word header is ptr-free */
853 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
854 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
855 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
858 /* it's a complex object ... */
859 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
863 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
865 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
867 int first_set = -1, num_set = 0, last_set = -1, i;
868 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
869 for (i = 0; i < numbits; ++i) {
870 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
877 if (elem_size <= MAX_ELEMENT_SIZE) {
878 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
880 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
882 /* Note: we also handle structs with just ref fields */
883 if (num_set * sizeof (gpointer) == elem_size) {
884 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
886 /* FIXME: try run-len first */
887 /* Note: we can't skip the object header here, because it's not present */
888 if (last_set <= SMALL_BITMAP_SIZE) {
889 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
892 /* it's am array of complex structs ... */
893 desc = DESC_TYPE_COMPLEX_ARR;
894 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
898 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
899 #define STRING_SIZE(size,str) do { \
900 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
901 (size) += (ALLOC_ALIGN - 1); \
902 (size) &= ~(ALLOC_ALIGN - 1); \
905 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
906 (size) = (desc) & 0xfff8; \
909 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
910 (size) = (desc) & 0xfff8; \
913 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
914 #define PREFETCH(addr)
916 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
917 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
918 if ((desc) & 0xffff0000) { \
919 /* there are pointers */ \
920 void **_objptr_end; \
921 void **_objptr = (void**)(obj); \
922 _objptr += ((desc) >> 16) & 0xff; \
923 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
924 while (_objptr < _objptr_end) { \
925 HANDLE_PTR (_objptr, (obj)); \
931 /* a bitmap desc means that there are pointer references or we'd have
932 * choosen run-length, instead: add an assert to check.
934 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
935 /* there are pointers */ \
936 void **_objptr = (void**)(obj); \
937 gsize _bmap = (desc) >> 16; \
938 _objptr += OBJECT_HEADER_WORDS; \
941 HANDLE_PTR (_objptr, (obj)); \
948 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
949 /* there are pointers */ \
950 void **_objptr = (void**)(obj); \
951 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
952 _objptr += OBJECT_HEADER_WORDS; \
955 HANDLE_PTR (_objptr, (obj)); \
962 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
963 /* there are pointers */ \
964 void **_objptr = (void**)(obj); \
965 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
966 int bwords = (*bitmap_data) - 1; \
967 void **start_run = _objptr; \
970 MonoObject *myobj = (MonoObject*)obj; \
971 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
973 while (bwords-- > 0) { \
974 gsize _bmap = *bitmap_data++; \
975 _objptr = start_run; \
976 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
979 HANDLE_PTR (_objptr, (obj)); \
984 start_run += GC_BITS_PER_WORD; \
988 /* this one is untested */
989 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
990 /* there are pointers */ \
991 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
992 int mbwords = (*mbitmap_data++) - 1; \
993 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
994 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
995 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
997 MonoObject *myobj = (MonoObject*)start; \
998 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
1000 while (e_start < e_end) { \
1001 void **_objptr = (void**)e_start; \
1002 gsize *bitmap_data = mbitmap_data; \
1003 unsigned int bwords = mbwords; \
1004 while (bwords-- > 0) { \
1005 gsize _bmap = *bitmap_data++; \
1006 void **start_run = _objptr; \
1007 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1009 if ((_bmap & 1)) { \
1010 HANDLE_PTR (_objptr, (obj)); \
1015 _objptr = start_run + GC_BITS_PER_WORD; \
1017 e_start += el_size; \
1021 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1022 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1023 if ((vt)->desc & 0xffffc000) { \
1024 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1025 /* there are pointers */ \
1026 int etype = (vt)->desc & 0xc000; \
1027 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1028 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1029 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1030 /* Note: this code can handle also arrays of struct with only references in them */ \
1031 while (p < end_refs) { \
1032 HANDLE_PTR (p, (obj)); \
1035 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1036 int offset = ((vt)->desc >> 16) & 0xff; \
1037 int num_refs = ((vt)->desc >> 24) & 0xff; \
1038 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1039 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1040 while (e_start < e_end) { \
1041 void **p = (void**)e_start; \
1044 for (i = 0; i < num_refs; ++i) { \
1045 HANDLE_PTR (p + i, (obj)); \
1047 e_start += el_size; \
1049 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1050 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1051 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1052 while (e_start < e_end) { \
1053 void **p = (void**)e_start; \
1054 gsize _bmap = (vt)->desc >> 16; \
1055 /* Note: there is no object header here to skip */ \
1057 if ((_bmap & 1)) { \
1058 HANDLE_PTR (p, (obj)); \
1063 e_start += el_size; \
1069 static mword new_obj_references = 0;
1070 static mword obj_references_checked = 0;
1073 #define HANDLE_PTR(ptr,obj) do { \
1074 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1075 new_obj_references++; \
1076 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1078 obj_references_checked++; \
1083 * ######################################################################
1084 * ######## Detecting and removing garbage.
1085 * ######################################################################
1086 * This section of code deals with detecting the objects no longer in use
1087 * and reclaiming the memory.
1089 static void __attribute__((noinline))
1090 scan_area (char *start, char *end)
1095 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1097 new_obj_references = 0;
1098 obj_references_checked = 0;
1099 while (start < end) {
1100 if (!*(void**)start) {
1101 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1104 vt = (GCVTable*)LOAD_VTABLE (start);
1105 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1107 MonoObject *obj = (MonoObject*)start;
1108 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1112 if (type == DESC_TYPE_STRING) {
1113 STRING_SIZE (skip_size, start);
1117 } else if (type == DESC_TYPE_RUN_LENGTH) {
1118 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1119 g_assert (skip_size);
1120 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1124 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1125 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1126 skip_size *= mono_array_length ((MonoArray*)start);
1127 skip_size += sizeof (MonoArray);
1128 skip_size += (ALLOC_ALIGN - 1);
1129 skip_size &= ~(ALLOC_ALIGN - 1);
1130 OBJ_VECTOR_FOREACH_PTR (vt, start);
1131 if (type == DESC_TYPE_ARRAY) {
1132 /* account for the bounds */
1137 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1138 OBJ_BITMAP_SIZE (skip_size, desc, start);
1139 g_assert (skip_size);
1140 OBJ_BITMAP_FOREACH_PTR (desc,start);
1144 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1145 skip_size = safe_object_get_size ((MonoObject*)start);
1146 skip_size += (ALLOC_ALIGN - 1);
1147 skip_size &= ~(ALLOC_ALIGN - 1);
1148 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1152 } else if (type == DESC_TYPE_COMPLEX) {
1153 /* this is a complex object */
1154 skip_size = safe_object_get_size ((MonoObject*)start);
1155 skip_size += (ALLOC_ALIGN - 1);
1156 skip_size &= ~(ALLOC_ALIGN - 1);
1157 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1161 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1162 /* this is an array of complex structs */
1163 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1164 skip_size *= mono_array_length ((MonoArray*)start);
1165 skip_size += sizeof (MonoArray);
1166 skip_size += (ALLOC_ALIGN - 1);
1167 skip_size &= ~(ALLOC_ALIGN - 1);
1168 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1169 if (type == DESC_TYPE_ARRAY) {
1170 /* account for the bounds */
1179 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1180 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1181 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1184 static void __attribute__((noinline))
1185 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1192 while (start < end) {
1193 if (!*(void**)start) {
1194 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1197 vt = (GCVTable*)LOAD_VTABLE (start);
1198 /* handle threads someway (maybe insert the root domain vtable?) */
1199 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1200 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1207 if (type == DESC_TYPE_STRING) {
1208 STRING_SIZE (skip_size, start);
1209 if (remove) memset (start, 0, skip_size);
1212 } else if (type == DESC_TYPE_RUN_LENGTH) {
1213 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1214 g_assert (skip_size);
1215 if (remove) memset (start, 0, skip_size);
1218 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1219 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1220 skip_size *= mono_array_length ((MonoArray*)start);
1221 skip_size += sizeof (MonoArray);
1222 skip_size += (ALLOC_ALIGN - 1);
1223 skip_size &= ~(ALLOC_ALIGN - 1);
1224 if (type == DESC_TYPE_ARRAY) {
1225 /* account for the bounds */
1227 if (remove) memset (start, 0, skip_size);
1230 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1231 OBJ_BITMAP_SIZE (skip_size, desc, start);
1232 g_assert (skip_size);
1233 if (remove) memset (start, 0, skip_size);
1236 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1237 skip_size = safe_object_get_size ((MonoObject*)start);
1238 skip_size += (ALLOC_ALIGN - 1);
1239 skip_size &= ~(ALLOC_ALIGN - 1);
1240 if (remove) memset (start, 0, skip_size);
1243 } else if (type == DESC_TYPE_COMPLEX) {
1244 /* this is a complex object */
1245 skip_size = safe_object_get_size ((MonoObject*)start);
1246 skip_size += (ALLOC_ALIGN - 1);
1247 skip_size &= ~(ALLOC_ALIGN - 1);
1248 if (remove) memset (start, 0, skip_size);
1251 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1252 /* this is an array of complex structs */
1253 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1254 skip_size *= mono_array_length ((MonoArray*)start);
1255 skip_size += sizeof (MonoArray);
1256 skip_size += (ALLOC_ALIGN - 1);
1257 skip_size &= ~(ALLOC_ALIGN - 1);
1258 if (type == DESC_TYPE_ARRAY) {
1259 /* account for the bounds */
1261 if (remove) memset (start, 0, skip_size);
1271 * When appdomains are unloaded we can easily remove objects that have finalizers,
1272 * but all the others could still be present in random places on the heap.
1273 * We need a sweep to get rid of them even though it's going to be costly
1275 * The reason we need to remove them is because we access the vtable and class
1276 * structures to know the object size and the reference bitmap: once the domain is
1277 * unloaded the point to random memory.
1280 mono_gc_clear_domain (MonoDomain * domain)
1282 GCMemSection *section;
1284 for (section = section_list; section; section = section->next) {
1285 scan_area_for_domain (domain, section->data, section->end_data);
1287 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1292 * add_to_global_remset:
1294 * The global remset contains locations in oldspace which point into newspace after
1295 * a minor collection. This can happen if the objects they point to are pinned.
1298 add_to_global_remset (gpointer ptr)
1302 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1305 * FIXME: If an object remains pinned, we need to add it at every minor collection.
1306 * To avoid uncontrolled growth of the global remset, only add each pointer once.
1308 if (global_remset->store_next < global_remset->end_set) {
1309 *(global_remset->store_next++) = (mword)ptr;
1312 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1313 rs->next = global_remset;
1315 *(global_remset->store_next++) = (mword)ptr;
1318 int global_rs_size = 0;
1320 for (rs = global_remset; rs; rs = rs->next) {
1321 global_rs_size += rs->store_next - rs->data;
1323 DEBUG (4, fprintf (gc_debug_file, "Global remset now has size %d\n", global_rs_size));
1328 * This is how the copying happens from the nursery to the old generation.
1329 * We assume that at this time all the pinned objects have been identified and
1331 * We run scan_object() for each pinned object so that each referenced
1332 * objects if possible are copied. The new gray objects created can have
1333 * scan_object() run on them right away, too.
1334 * Then we run copy_object() for the precisely tracked roots. At this point
1335 * all the roots are either gray or black. We run scan_object() on the gray
1336 * objects until no more gray objects are created.
1337 * At the end of the process we walk again the pinned list and we unmark
1338 * the pinned flag. As we go we also create the list of free space for use
1339 * in the next allocation runs.
1341 * We need to remember objects from the old generation that point to the new one
1342 * (or just addresses?).
1344 * copy_object could be made into a macro once debugged (use inline for now).
1347 static char* __attribute__((noinline))
1348 copy_object (char *obj, char *from_space_start, char *from_space_end)
1350 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1354 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1355 if ((forwarded = object_is_forwarded (obj))) {
1356 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1357 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1360 if (object_is_pinned (obj)) {
1361 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1362 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1365 objsize = safe_object_get_size ((MonoObject*)obj);
1366 objsize += ALLOC_ALIGN - 1;
1367 objsize &= ~(ALLOC_ALIGN - 1);
1368 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1369 /* FIXME: handle pinned allocs:
1370 * Large objects are simple, at least until we always follow the rule:
1371 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1372 * At the end of major collections, we walk the los list and if
1373 * the object is pinned, it is marked, otherwise it can be freed.
1375 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1376 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1380 /* ok, the object is not pinned, we can move it */
1381 /* use a optimized memcpy here */
1386 char* edi = gray_objects;
1387 __asm__ __volatile__(
1389 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1390 : "0" (objsize/4), "1" (edi),"2" (esi)
1395 memcpy (gray_objects, obj, objsize);
1397 /* adjust array->bounds */
1398 vt = ((MonoObject*)obj)->vtable;
1399 g_assert (vt->gc_descr);
1400 if (vt->rank && ((MonoArray*)obj)->bounds) {
1401 MonoArray *array = (MonoArray*)gray_objects;
1402 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1403 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1405 /* set the forwarding pointer */
1406 forward_object (obj, gray_objects);
1408 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1409 gray_objects += objsize;
1410 DEBUG (8, g_assert (gray_objects <= to_space_end));
1417 #define HANDLE_PTR(ptr,obj) do { \
1419 void *__old = *(ptr); \
1420 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1421 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1422 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) && !ptr_in_nursery (ptr)) \
1423 add_to_global_remset ((ptr)); \
1428 * Scan the object pointed to by @start for references to
1429 * other objects between @from_start and @from_end and copy
1430 * them to the gray_objects area.
1431 * Returns a pointer to the end of the object.
1434 scan_object (char *start, char* from_start, char* from_end)
1440 vt = (GCVTable*)LOAD_VTABLE (start);
1441 //type = vt->desc & 0x7;
1443 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1445 switch (desc & 0x7) {
1446 //if (type == DESC_TYPE_STRING) {
1447 case DESC_TYPE_STRING:
1448 STRING_SIZE (skip_size, start);
1449 return start + skip_size;
1450 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1451 case DESC_TYPE_RUN_LENGTH:
1452 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1453 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1454 g_assert (skip_size);
1455 return start + skip_size;
1456 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1457 case DESC_TYPE_ARRAY:
1458 case DESC_TYPE_VECTOR:
1459 OBJ_VECTOR_FOREACH_PTR (vt, start);
1460 skip_size = safe_object_get_size ((MonoObject*)start);
1462 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1463 skip_size *= mono_array_length ((MonoArray*)start);
1464 skip_size += sizeof (MonoArray);
1466 skip_size += (ALLOC_ALIGN - 1);
1467 skip_size &= ~(ALLOC_ALIGN - 1);
1468 return start + skip_size;
1469 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1470 case DESC_TYPE_SMALL_BITMAP:
1471 OBJ_BITMAP_FOREACH_PTR (desc,start);
1472 OBJ_BITMAP_SIZE (skip_size, desc, start);
1473 return start + skip_size;
1474 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1475 case DESC_TYPE_LARGE_BITMAP:
1476 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1477 skip_size = safe_object_get_size ((MonoObject*)start);
1478 skip_size += (ALLOC_ALIGN - 1);
1479 skip_size &= ~(ALLOC_ALIGN - 1);
1480 return start + skip_size;
1481 //} else if (type == DESC_TYPE_COMPLEX) {
1482 case DESC_TYPE_COMPLEX:
1483 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1484 /* this is a complex object */
1485 skip_size = safe_object_get_size ((MonoObject*)start);
1486 skip_size += (ALLOC_ALIGN - 1);
1487 skip_size &= ~(ALLOC_ALIGN - 1);
1488 return start + skip_size;
1489 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1490 case DESC_TYPE_COMPLEX_ARR:
1491 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1492 /* this is an array of complex structs */
1493 skip_size = safe_object_get_size ((MonoObject*)start);
1495 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1496 skip_size *= mono_array_length ((MonoArray*)start);
1497 skip_size += sizeof (MonoArray);
1499 skip_size += (ALLOC_ALIGN - 1);
1500 skip_size &= ~(ALLOC_ALIGN - 1);
1501 return start + skip_size;
1503 g_assert_not_reached ();
1510 * Scan the valuetype pointed to by START, described by DESC for references to
1511 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1512 * Returns a pointer to the end of the object.
1515 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1519 /* The descriptors include info about the MonoObject header as well */
1520 start -= sizeof (MonoObject);
1522 switch (desc & 0x7) {
1523 case DESC_TYPE_RUN_LENGTH:
1524 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1525 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1526 g_assert (skip_size);
1527 return start + skip_size;
1528 case DESC_TYPE_SMALL_BITMAP:
1529 OBJ_BITMAP_FOREACH_PTR (desc,start);
1530 OBJ_BITMAP_SIZE (skip_size, desc, start);
1531 return start + skip_size;
1532 case DESC_TYPE_LARGE_BITMAP:
1533 case DESC_TYPE_COMPLEX:
1535 g_assert_not_reached ();
1538 // The other descriptors can't happen with vtypes
1539 g_assert_not_reached ();
1546 * Addresses from start to end are already sorted. This function finds the object header
1547 * for each address and pins the object. The addresses must be inside the passed section.
1548 * Return the number of pinned objects.
1551 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1556 void *last_obj = NULL;
1557 size_t last_obj_size = 0;
1560 void **definitely_pinned = start;
1561 while (start < end) {
1563 /* the range check should be reduntant */
1564 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1565 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1566 /* multiple pointers to the same object */
1567 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1571 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1572 search_start = (void*)section->scan_starts [idx];
1573 if (!search_start || search_start > addr) {
1576 search_start = section->scan_starts [idx];
1577 if (search_start && search_start <= addr)
1580 if (!search_start || search_start > addr)
1581 search_start = start_nursery;
1583 if (search_start < last_obj)
1584 search_start = (char*)last_obj + last_obj_size;
1585 /* now addr should be in an object a short distance from search_start
1586 * Note that search_start must point to zeroed mem or point to an object.
1589 if (!*(void**)search_start) {
1590 mword p = (mword)search_start;
1591 p += sizeof (gpointer);
1592 p += ALLOC_ALIGN - 1;
1593 p &= ~(ALLOC_ALIGN - 1);
1594 search_start = (void*)p;
1597 last_obj = search_start;
1598 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1599 last_obj_size += ALLOC_ALIGN - 1;
1600 last_obj_size &= ~(ALLOC_ALIGN - 1);
1601 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1602 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1603 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1604 pin_object (search_start);
1605 definitely_pinned [count] = search_start;
1609 /* skip to the next object */
1610 search_start = (void*)((char*)search_start + last_obj_size);
1611 } while (search_start <= addr);
1612 /* we either pinned the correct object or we ignored the addr because
1613 * it points to unused zeroed memory.
1619 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1623 static void** pin_queue;
1624 static int pin_queue_size = 0;
1625 static int next_pin_slot = 0;
1630 gap = (gap * 10) / 13;
1631 if (gap == 9 || gap == 10)
1640 compare_addr (const void *a, const void *b)
1642 return *(const void **)a - *(const void **)b;
1646 /* sort the addresses in array in increasing order */
1648 sort_addresses (void **array, int size)
1651 * qsort is slower as predicted.
1652 * qsort (array, size, sizeof (gpointer), compare_addr);
1659 gap = new_gap (gap);
1662 for (i = 0; i < end; i++) {
1664 if (array [i] > array [j]) {
1665 void* val = array [i];
1666 array [i] = array [j];
1671 if (gap == 1 && !swapped)
1677 print_nursery_gaps (void* start_nursery, void *end_nursery)
1680 gpointer first = start_nursery;
1682 for (i = 0; i < next_pin_slot; ++i) {
1683 next = pin_queue [i];
1684 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1688 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1691 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1693 optimize_pin_queue (int start_slot)
1695 void **start, **cur, **end;
1696 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1697 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1698 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1699 if ((next_pin_slot - start_slot) > 1)
1700 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1701 start = cur = pin_queue + start_slot;
1702 end = pin_queue + next_pin_slot;
1705 while (*start == *cur && cur < end)
1709 next_pin_slot = start - pin_queue;
1710 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1711 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1716 realloc_pin_queue (void)
1718 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1719 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1720 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1721 free_internal_mem (pin_queue);
1722 pin_queue = new_pin;
1723 pin_queue_size = new_size;
1724 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1728 * Scan the memory between start and end and queue values which could be pointers
1729 * to the area between start_nursery and end_nursery for later consideration.
1730 * Typically used for thread stacks.
1733 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1736 while (start < end) {
1737 if (*start >= start_nursery && *start < end_nursery) {
1739 * *start can point to the middle of an object
1740 * note: should we handle pointing at the end of an object?
1741 * pinning in C# code disallows pointing at the end of an object
1742 * but there is some small chance that an optimizing C compiler
1743 * may keep the only reference to an object by pointing
1744 * at the end of it. We ignore this small chance for now.
1745 * Pointers to the end of an object are indistinguishable
1746 * from pointers to the start of the next object in memory
1747 * so if we allow that we'd need to pin two objects...
1748 * We queue the pointer in an array, the
1749 * array will then be sorted and uniqued. This way
1750 * we can coalesce several pinning pointers and it should
1751 * be faster since we'd do a memory scan with increasing
1752 * addresses. Note: we can align the address to the allocation
1753 * alignment, so the unique process is more effective.
1755 mword addr = (mword)*start;
1756 addr &= ~(ALLOC_ALIGN - 1);
1757 if (next_pin_slot >= pin_queue_size)
1758 realloc_pin_queue ();
1759 pin_queue [next_pin_slot++] = (void*)addr;
1760 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1765 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1767 #ifdef HAVE_VALGRIND_MEMCHECK_H
1769 * The pinning addresses might come from undefined memory, this is normal. Since they
1770 * are used in lots of functions, we make the memory defined here instead of having
1771 * to add a supression for those functions.
1773 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
1778 * If generation is 0, just mark objects in the nursery, the others we don't care,
1779 * since they are not going to move anyway.
1780 * There are different areas that are scanned for pinned pointers:
1781 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1782 * *) the pinned handle table
1783 * *) the pinned roots
1785 * Note: when we'll use a write barrier for old to new gen references, we need to
1786 * keep track of old gen objects that point to pinned new gen objects because in that
1787 * case the referenced object will be moved maybe at the next collection, but there
1788 * is no write in the old generation area where the pinned object is referenced
1789 * and we may not consider it as reachable.
1792 mark_pinned_objects (int generation)
1797 * Debugging function: find in the conservative roots where @obj is being pinned.
1800 find_pinning_reference (char *obj, size_t size)
1804 char *endobj = obj + size;
1805 for (i = 0; i < roots_hash_size; ++i) {
1806 for (root = roots_hash [i]; root; root = root->next) {
1807 /* if desc is non-null it has precise info */
1808 if (!root->root_desc) {
1809 char ** start = (char**)root->start_root;
1810 while (start < (char**)root->end_root) {
1811 if (*start >= obj && *start < endobj) {
1812 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1819 find_pinning_ref_from_thread (obj, size);
1823 * The first thing we do in a collection is to identify pinned objects.
1824 * This function considers all the areas of memory that need to be
1825 * conservatively scanned.
1828 pin_from_roots (void *start_nursery, void *end_nursery)
1832 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1833 /* objects pinned from the API are inside these roots */
1834 for (i = 0; i < roots_hash_size; ++i) {
1835 for (root = roots_hash [i]; root; root = root->next) {
1836 /* if desc is non-null it has precise info */
1837 if (root->root_desc)
1839 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1840 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1843 /* now deal with the thread stacks
1844 * in the future we should be able to conservatively scan only:
1845 * *) the cpu registers
1846 * *) the unmanaged stack frames
1847 * *) the _last_ managed stack frame
1848 * *) pointers slots in managed frames
1850 pin_thread_data (start_nursery, end_nursery);
1854 * The memory area from start_root to end_root contains pointers to objects.
1855 * Their position is precisely described by @desc (this means that the pointer
1856 * can be either NULL or the pointer to the start of an object).
1857 * This functions copies them to to_space updates them.
1860 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1862 switch (desc & ROOT_DESC_TYPE_MASK) {
1863 case ROOT_DESC_BITMAP:
1864 desc >>= ROOT_DESC_TYPE_SHIFT;
1866 if ((desc & 1) && *start_root) {
1867 *start_root = copy_object (*start_root, n_start, n_end);
1868 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1874 case ROOT_DESC_RUN_LEN:
1875 case ROOT_DESC_LARGE_BITMAP:
1876 g_assert_not_reached ();
1881 alloc_fragment (void)
1883 Fragment *frag = fragment_freelist;
1885 fragment_freelist = frag->next;
1889 frag = get_internal_mem (sizeof (Fragment));
1895 * Allocate and setup the data structures needed to be able to allocate objects
1896 * in the nursery. The nursery is stored in nursery_section.
1899 alloc_nursery (void)
1901 GCMemSection *section;
1906 if (nursery_section)
1908 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
1909 /* later we will alloc a larger area for the nursery but only activate
1910 * what we need. The rest will be used as expansion if we have too many pinned
1911 * objects in the existing nursery.
1913 /* FIXME: handle OOM */
1914 section = get_internal_mem (sizeof (GCMemSection));
1915 data = get_os_memory (nursery_size, TRUE);
1916 nursery_start = nursery_next = data;
1917 nursery_real_end = data + nursery_size;
1918 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1919 total_alloc += nursery_size;
1920 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
1921 section->data = section->next_data = data;
1922 section->size = nursery_size;
1923 section->end_data = nursery_real_end;
1924 scan_starts = nursery_size / SCAN_START_SIZE;
1925 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1926 section->num_scan_start = scan_starts;
1927 section->role = MEMORY_ROLE_GEN0;
1929 /* add to the section list */
1930 section->next = section_list;
1931 section_list = section;
1933 nursery_section = section;
1935 /* Setup the single first large fragment */
1936 frag = alloc_fragment ();
1937 frag->fragment_start = nursery_start;
1938 frag->fragment_limit = nursery_start;
1939 frag->fragment_end = nursery_real_end;
1940 nursery_frag_real_end = nursery_real_end;
1941 /* FIXME: frag here is lost */
1945 * Update roots in the old generation. Since we currently don't have the
1946 * info from the write barriers, we just scan all the objects.
1949 scan_old_generation (char *start, char* end)
1951 GCMemSection *section;
1953 LOSObject *big_object;
1956 for (section = section_list; section; section = section->next) {
1957 if (section == nursery_section)
1959 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1960 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1962 while (p < section->next_data) {
1967 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1968 p = scan_object (p, start, end);
1971 /* scan the old object space, too */
1972 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1973 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
1974 scan_object (big_object->data, start, end);
1976 /* scan the list of objects ready for finalization */
1977 for (fin = fin_ready_list; fin; fin = fin->next) {
1978 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1979 fin->object = copy_object (fin->object, start, end);
1983 static mword fragment_total = 0;
1985 * We found a fragment of free memory in the nursery: memzero it and if
1986 * it is big enough, add it to the list of fragments that can be used for
1990 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1993 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
1994 /* memsetting just the first chunk start is bound to provide better cache locality */
1995 if (nursery_clear_policy == CLEAR_AT_GC)
1996 memset (frag_start, 0, frag_size);
1997 /* Not worth dealing with smaller fragments: need to tune */
1998 if (frag_size >= FRAGMENT_MIN_SIZE) {
1999 fragment = alloc_fragment ();
2000 fragment->fragment_start = frag_start;
2001 fragment->fragment_limit = frag_start;
2002 fragment->fragment_end = frag_end;
2003 fragment->next = nursery_fragments;
2004 nursery_fragments = fragment;
2005 fragment_total += frag_size;
2007 /* Clear unused fragments, pinning depends on this */
2008 memset (frag_start, 0, frag_size);
2013 scan_needed_big_objects (char *start_addr, char *end_addr)
2015 LOSObject *big_object;
2017 for (big_object = los_object_list; big_object; big_object = big_object->next) {
2018 if (!big_object->scanned && object_is_pinned (big_object->data)) {
2019 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
2020 scan_object (big_object->data, start_addr, end_addr);
2021 big_object->scanned = TRUE;
2029 drain_gray_stack (char *start_addr, char *end_addr)
2033 int fin_ready, bigo_scanned_num;
2037 * We copied all the reachable objects. Now it's the time to copy
2038 * the objects that were not referenced by the roots, but by the copied objects.
2039 * we built a stack of objects pointed to by gray_start: they are
2040 * additional roots and we may add more items as we go.
2041 * We loop until gray_start == gray_objects which means no more objects have
2042 * been added. Note this is iterative: no recursion is involved.
2043 * We need to walk the LO list as well in search of marked big objects
2044 * (use a flag since this is needed only on major collections). We need to loop
2045 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2048 gray_start = to_space;
2049 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2050 while (gray_start < gray_objects) {
2051 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2052 gray_start = scan_object (gray_start, start_addr, end_addr);
2055 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2056 //scan_old_generation (start_addr, end_addr);
2057 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2058 /* walk the finalization queue and move also the objects that need to be
2059 * finalized: use the finalized objects as new roots so the objects they depend
2060 * on are also not reclaimed. As with the roots above, only objects in the nursery
2061 * are marked/copied.
2062 * We need a loop here, since objects ready for finalizers may reference other objects
2063 * that are fin-ready. Speedup with a flag?
2066 fin_ready = num_ready_finalizers;
2067 finalize_in_range (start_addr, end_addr);
2068 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2070 /* drain the new stack that might have been created */
2071 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2072 while (gray_start < gray_objects) {
2073 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2074 gray_start = scan_object (gray_start, start_addr, end_addr);
2076 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2078 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2079 to_space = gray_start;
2080 to_space_section->next_data = to_space;
2083 * handle disappearing links
2084 * Note we do this after checking the finalization queue because if an object
2085 * survives (at least long enough to be finalized) we don't clear the link.
2086 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2087 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2090 null_link_in_range (start_addr, end_addr);
2092 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2095 static int last_num_pinned = 0;
2098 build_nursery_fragments (int start_pin, int end_pin)
2100 char *frag_start, *frag_end;
2104 /* FIXME: handle non-NULL fragment_freelist */
2105 fragment_freelist = nursery_fragments;
2106 nursery_fragments = NULL;
2107 frag_start = nursery_start;
2109 /* clear scan starts */
2110 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2111 for (i = start_pin; i < end_pin; ++i) {
2112 frag_end = pin_queue [i];
2113 /* remove the pin bit from pinned objects */
2114 unpin_object (frag_end);
2115 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2116 frag_size = frag_end - frag_start;
2118 add_nursery_frag (frag_size, frag_start, frag_end);
2119 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2120 frag_size += ALLOC_ALIGN - 1;
2121 frag_size &= ~(ALLOC_ALIGN - 1);
2122 frag_start = (char*)pin_queue [i] + frag_size;
2124 * pin_queue [i] might point to a half-constructed string or vector whose
2125 * length field is not set. In that case, frag_start points inside the
2126 * (zero initialized) object. Find the end of the object by scanning forward.
2129 if (is_maybe_half_constructed (pin_queue [i])) {
2132 /* This is also hit for zero length arrays/strings */
2134 /* Find the end of the TLAB which contained this allocation */
2135 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2138 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2139 frag_start += sizeof (mword);
2142 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2143 * half constructed object.
2148 nursery_last_pinned_end = frag_start;
2149 frag_end = nursery_real_end;
2150 frag_size = frag_end - frag_start;
2152 add_nursery_frag (frag_size, frag_start, frag_end);
2153 if (!nursery_fragments) {
2154 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2155 for (i = start_pin; i < end_pin; ++i) {
2156 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2161 nursery_next = nursery_frag_real_end = NULL;
2163 /* Clear TLABs for all threads */
2167 /* FIXME: later reduce code duplication here with the above
2168 * We don't keep track of section fragments for non-nursery sections yet, so
2172 build_section_fragments (GCMemSection *section)
2175 char *frag_start, *frag_end;
2178 /* clear scan starts */
2179 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2180 frag_start = section->data;
2181 section->next_data = section->data;
2182 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2183 frag_end = pin_queue [i];
2184 /* remove the pin bit from pinned objects */
2185 unpin_object (frag_end);
2186 if (frag_end >= section->data + section->size) {
2187 frag_end = section->data + section->size;
2189 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2191 frag_size = frag_end - frag_start;
2193 memset (frag_start, 0, frag_size);
2194 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2195 frag_size += ALLOC_ALIGN - 1;
2196 frag_size &= ~(ALLOC_ALIGN - 1);
2197 frag_start = (char*)pin_queue [i] + frag_size;
2198 section->next_data = MAX (section->next_data, frag_start);
2200 frag_end = section->end_data;
2201 frag_size = frag_end - frag_start;
2203 memset (frag_start, 0, frag_size);
2207 scan_from_registered_roots (char *addr_start, char *addr_end)
2211 for (i = 0; i < roots_hash_size; ++i) {
2212 for (root = roots_hash [i]; root; root = root->next) {
2213 /* if desc is non-null it has precise info */
2214 if (!root->root_desc)
2216 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2217 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2223 * Collect objects in the nursery.
2226 collect_nursery (size_t requested_size)
2228 GCMemSection *section;
2229 size_t max_garbage_amount;
2231 char *orig_nursery_next;
2233 TV_DECLARE (all_atv);
2234 TV_DECLARE (all_btv);
2239 orig_nursery_next = nursery_next;
2240 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2241 /* FIXME: optimize later to use the higher address where an object can be present */
2242 nursery_next = MAX (nursery_next, nursery_real_end);
2244 if (consistency_check_at_minor_collection)
2245 check_consistency ();
2247 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2248 max_garbage_amount = nursery_next - nursery_start;
2250 /* Clear all remaining nursery fragments, pinning depends on this */
2251 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2252 g_assert (orig_nursery_next <= nursery_frag_real_end);
2253 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2254 for (frag = nursery_fragments; frag; frag = frag->next) {
2255 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2260 * not enough room in the old generation to store all the possible data from
2261 * the nursery in a single continuous space.
2262 * We reset to_space if we allocated objects in degraded mode.
2264 if (to_space_section)
2265 to_space = gray_objects = to_space_section->next_data;
2266 if ((to_space_end - to_space) < max_garbage_amount) {
2267 section = alloc_section (nursery_section->size * 4);
2268 g_assert (nursery_section->size >= max_garbage_amount);
2269 to_space = gray_objects = section->next_data;
2270 to_space_end = section->end_data;
2271 to_space_section = section;
2273 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2274 nursery_section->next_data = nursery_next;
2277 mono_stats.minor_gc_count ++;
2278 /* world must be stopped already */
2279 TV_GETTIME (all_atv);
2281 /* pin from pinned handles */
2282 pin_from_roots (nursery_start, nursery_next);
2283 /* identify pinned objects */
2284 optimize_pin_queue (0);
2285 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2287 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2288 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2291 * walk all the roots and copy the young objects to the old generation,
2292 * starting from to_space
2295 scan_from_remsets (nursery_start, nursery_next);
2296 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2298 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2299 /* FIXME: later scan also alloc_pinned objects */
2301 /* the pinned objects are roots */
2302 for (i = 0; i < next_pin_slot; ++i) {
2303 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2304 scan_object (pin_queue [i], nursery_start, nursery_next);
2306 /* registered roots, this includes static fields */
2307 scan_from_registered_roots (nursery_start, nursery_next);
2308 /* alloc_pinned objects */
2309 scan_from_pinned_objects (nursery_start, nursery_next);
2311 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2313 drain_gray_stack (nursery_start, nursery_next);
2315 /* walk the pin_queue, build up the fragment list of free memory, unmark
2316 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2319 build_nursery_fragments (0, next_pin_slot);
2321 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2323 TV_GETTIME (all_btv);
2324 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2326 /* prepare the pin queue for the next collection */
2327 last_num_pinned = next_pin_slot;
2329 if (fin_ready_list) {
2330 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2331 mono_gc_finalize_notify ();
2336 major_collection (void)
2338 GCMemSection *section, *prev_section;
2339 LOSObject *bigobj, *prevbo;
2345 TV_DECLARE (all_atv);
2346 TV_DECLARE (all_btv);
2349 /* FIXME: only use these values for the precise scan
2350 * note that to_space pointers should be excluded anyway...
2352 char *heap_start = NULL;
2353 char *heap_end = (char*)-1;
2354 size_t copy_space_required = 0;
2357 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2359 mono_stats.major_gc_count ++;
2361 /* Clear all remaining nursery fragments, pinning depends on this */
2362 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2363 g_assert (nursery_next <= nursery_frag_real_end);
2364 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2365 for (frag = nursery_fragments; frag; frag = frag->next) {
2366 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2371 * FIXME: implement Mark/Compact
2372 * Until that is done, we can just apply mostly the same alg as for the nursery:
2373 * this means we need a big section to potentially copy all the other sections, so
2374 * it is not ideal specially with large heaps.
2376 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2377 collect_nursery (0);
2380 TV_GETTIME (all_atv);
2381 /* FIXME: make sure the nursery next_data ptr is updated */
2382 nursery_section->next_data = nursery_real_end;
2383 /* we should also coalesce scanning from sections close to each other
2384 * and deal with pointers outside of the sections later.
2386 /* The remsets are not useful for a major collection */
2388 /* world must be stopped already */
2390 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2391 for (section = section_list; section; section = section->next) {
2392 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2393 pin_from_roots (section->data, section->next_data);
2394 if (count != next_pin_slot) {
2396 optimize_pin_queue (count);
2397 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2398 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2399 section->pin_queue_end = next_pin_slot = count + reduced_to;
2401 copy_space_required += (char*)section->next_data - (char*)section->data;
2403 /* identify possible pointers to the insize of large objects */
2404 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2405 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2406 count = next_pin_slot;
2407 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2408 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2409 if (next_pin_slot != count) {
2410 next_pin_slot = count;
2411 pin_object (bigobj->data);
2412 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2415 /* look for pinned addresses for pinned-alloc objects */
2416 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2417 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2418 count = next_pin_slot;
2419 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2420 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2421 if (next_pin_slot != count) {
2422 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2423 next_pin_slot = count;
2428 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2429 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2431 /* allocate the big to space */
2432 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2433 section = alloc_section (copy_space_required);
2434 to_space = gray_objects = section->next_data;
2435 to_space_end = section->end_data;
2436 to_space_section = section;
2438 /* the old generation doesn't need to be scanned (no remembered sets or card
2439 * table needed either): the only objects that must survive are those pinned and
2440 * those referenced by the precise roots.
2441 * mark any section without pinned objects, so we can free it since we will be able to
2442 * move all the objects.
2444 /* the pinned objects are roots (big objects are included in this list, too) */
2445 for (i = 0; i < next_pin_slot; ++i) {
2446 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2447 scan_object (pin_queue [i], heap_start, heap_end);
2449 /* registered roots, this includes static fields */
2450 scan_from_registered_roots (heap_start, heap_end);
2451 /* alloc_pinned objects */
2452 scan_from_pinned_objects (heap_start, heap_end);
2453 /* scan the list of objects ready for finalization */
2454 for (fin = fin_ready_list; fin; fin = fin->next) {
2455 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2456 fin->object = copy_object (fin->object, heap_start, heap_end);
2459 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2461 /* we need to go over the big object list to see if any was marked and scan it
2462 * And we need to make this in a loop, considering that objects referenced by finalizable
2463 * objects could reference big objects (this happens in drain_gray_stack ())
2465 scan_needed_big_objects (heap_start, heap_end);
2466 /* all the objects in the heap */
2467 drain_gray_stack (heap_start, heap_end);
2469 /* sweep the big objects list */
2471 for (bigobj = los_object_list; bigobj;) {
2472 if (object_is_pinned (bigobj->data)) {
2473 unpin_object (bigobj->data);
2474 bigobj->scanned = FALSE;
2477 /* not referenced anywhere, so we can free it */
2479 prevbo->next = bigobj->next;
2481 los_object_list = bigobj->next;
2483 bigobj = bigobj->next;
2484 free_large_object (to_free);
2488 bigobj = bigobj->next;
2490 /* unpin objects from the pinned chunks and free the unmarked ones */
2491 sweep_pinned_objects ();
2493 /* free the unused sections */
2494 prev_section = NULL;
2495 for (section = section_list; section;) {
2496 /* to_space doesn't need handling here and the nursery is special */
2497 if (section == to_space_section || section == nursery_section) {
2498 prev_section = section;
2499 section = section->next;
2502 /* no pinning object, so the section is free */
2503 if (section->pin_queue_start == section->pin_queue_end) {
2504 GCMemSection *to_free;
2506 prev_section->next = section->next;
2508 section_list = section->next;
2510 section = section->next;
2511 free_mem_section (to_free);
2514 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2515 build_section_fragments (section);
2517 prev_section = section;
2518 section = section->next;
2521 /* walk the pin_queue, build up the fragment list of free memory, unmark
2522 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2525 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2527 TV_GETTIME (all_btv);
2528 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2529 /* prepare the pin queue for the next collection */
2531 if (fin_ready_list) {
2532 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2533 mono_gc_finalize_notify ();
2538 * Allocate a new section of memory to be used as old generation.
2540 static GCMemSection*
2541 alloc_section (size_t size)
2543 GCMemSection *section;
2546 size_t new_size = next_section_size;
2548 if (size > next_section_size) {
2550 new_size += pagesize - 1;
2551 new_size &= ~(pagesize - 1);
2553 section_size_used++;
2554 if (section_size_used > 3) {
2555 section_size_used = 0;
2556 next_section_size *= 2;
2557 if (next_section_size > max_section_size)
2558 next_section_size = max_section_size;
2560 section = get_internal_mem (sizeof (GCMemSection));
2561 data = get_os_memory (new_size, TRUE);
2562 section->data = section->next_data = data;
2563 section->size = new_size;
2564 section->end_data = data + new_size;
2565 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2566 total_alloc += new_size;
2567 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2568 section->data = data;
2569 section->size = new_size;
2570 scan_starts = new_size / SCAN_START_SIZE;
2571 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2572 section->num_scan_start = scan_starts;
2573 section->role = MEMORY_ROLE_GEN1;
2575 /* add to the section list */
2576 section->next = section_list;
2577 section_list = section;
2583 free_mem_section (GCMemSection *section)
2585 char *data = section->data;
2586 size_t size = section->size;
2587 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2588 free_os_memory (data, size);
2589 free_internal_mem (section);
2590 total_alloc -= size;
2594 * When deciding if it's better to collect or to expand, keep track
2595 * of how much garbage was reclaimed with the last collection: if it's too
2597 * This is called when we could not allocate a small object.
2599 static void __attribute__((noinline))
2600 minor_collect_or_expand_inner (size_t size)
2602 int do_minor_collection = 1;
2604 if (!nursery_section) {
2608 if (do_minor_collection) {
2610 collect_nursery (size);
2611 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2613 /* this also sets the proper pointers for the next allocation */
2614 if (!search_fragment_for_size (size)) {
2616 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2617 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2618 for (i = 0; i < last_num_pinned; ++i) {
2619 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2624 //report_internal_mem_usage ();
2628 * ######################################################################
2629 * ######## Memory allocation from the OS
2630 * ######################################################################
2631 * This section of code deals with getting memory from the OS and
2632 * allocating memory for GC-internal data structures.
2633 * Internal memory can be handled with a freelist for small objects.
2637 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2638 * This must not require any lock.
2641 get_os_memory (size_t size, int activate)
2644 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2646 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2647 size += pagesize - 1;
2648 size &= ~(pagesize - 1);
2649 ptr = mono_valloc (0, size, prot_flags);
2654 * Free the memory returned by get_os_memory (), returning it to the OS.
2657 free_os_memory (void *addr, size_t size)
2659 munmap (addr, size);
2666 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2668 int i, free_pages, num_free, free_mem;
2670 for (i = 0; i < chunk->num_pages; ++i) {
2671 if (!chunk->page_sizes [i])
2674 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2675 free_mem = FREELIST_PAGESIZE * free_pages;
2676 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2677 if (!chunk->free_list [i])
2680 p = chunk->free_list [i];
2685 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2686 free_mem += freelist_sizes [i] * num_free;
2688 printf ("\tfree memory in chunk: %d\n", free_mem);
2695 report_internal_mem_usage (void) {
2698 printf ("Internal memory usage:\n");
2700 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2701 report_pinned_chunk (chunk, i++);
2703 printf ("Pinned memory usage:\n");
2705 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2706 report_pinned_chunk (chunk, i++);
2711 * the array of pointers from @start to @end contains conservative
2712 * pointers to objects inside @chunk: mark each referenced object
2716 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2718 for (; start < end; start++) {
2719 char *addr = *start;
2720 int offset = (char*)addr - (char*)chunk;
2721 int page = offset / FREELIST_PAGESIZE;
2722 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2723 int slot_size = chunk->page_sizes [page];
2725 /* the page is not allocated */
2728 /* would be faster if we restrict the sizes to power of two,
2729 * but that's a waste of memory: need to measure. it could reduce
2730 * fragmentation since there are less pages needed, if for example
2731 * someone interns strings of each size we end up with one page per
2732 * interned string (still this is just ~40 KB): with more fine-grained sizes
2733 * this increases the number of used pages.
2736 obj_offset /= slot_size;
2737 obj_offset *= slot_size;
2738 addr = (char*)chunk->start_data + obj_offset;
2740 obj_offset /= slot_size;
2741 obj_offset *= slot_size;
2742 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2745 /* if the vtable is inside the chunk it's on the freelist, so skip */
2746 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2748 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2754 sweep_pinned_objects (void)
2761 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2762 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2763 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2764 for (i = 0; i < chunk->num_pages; ++i) {
2765 obj_size = chunk->page_sizes [i];
2768 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2769 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2770 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2771 while (p + obj_size <= endp) {
2773 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2774 /* if the first word (the vtable) is outside the chunk we have an object */
2775 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2776 if (object_is_pinned (ptr)) {
2778 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2780 /* FIXME: add to freelist */
2781 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2791 scan_from_pinned_objects (char *addr_start, char *addr_end)
2798 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2799 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2800 DEBUG (6, fprintf (gc_debug_file, "Scanning pinned chunk %p (range: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2801 for (i = 0; i < chunk->num_pages; ++i) {
2802 obj_size = chunk->page_sizes [i];
2805 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2806 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2807 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2808 while (p + obj_size <= endp) {
2810 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2811 /* if the first word (the vtable) is outside the chunk we have an object */
2812 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2813 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of alloc_pinned %p (%s)\n", i, ptr, safe_name (ptr)));
2814 // FIXME: Put objects without references into separate chunks
2815 // which do not need to be scanned
2816 scan_object ((char*)ptr, addr_start, addr_end);
2825 * Find the slot number in the freelist for memory chunks that
2826 * can contain @size objects.
2829 slot_for_size (size_t size)
2832 /* do a binary search or lookup table later. */
2833 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2834 if (freelist_sizes [slot] >= size)
2837 g_assert_not_reached ();
2842 * Build a free list for @size memory chunks from the memory area between
2843 * start_page and end_page.
2846 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2850 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2851 p = (void**)start_page;
2852 end = (void**)(end_page - size);
2853 g_assert (!chunk->free_list [slot]);
2854 chunk->free_list [slot] = p;
2855 while ((char*)p + size <= (char*)end) {
2857 *p = (void*)((char*)p + size);
2861 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2865 alloc_pinned_chunk (size_t size)
2870 size += pagesize; /* at least one page */
2871 size += pagesize - 1;
2872 size &= ~(pagesize - 1);
2873 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2874 size = PINNED_CHUNK_MIN_SIZE * 2;
2875 chunk = get_os_memory (size, TRUE);
2876 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2877 total_alloc += size;
2879 /* setup the bookeeping fields */
2880 chunk->num_pages = size / FREELIST_PAGESIZE;
2881 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2882 chunk->page_sizes = (void*)((char*)chunk + offset);
2883 offset += sizeof (int) * chunk->num_pages;
2884 offset += ALLOC_ALIGN - 1;
2885 offset &= ~(ALLOC_ALIGN - 1);
2886 chunk->free_list = (void*)((char*)chunk + offset);
2887 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2888 offset += ALLOC_ALIGN - 1;
2889 offset &= ~(ALLOC_ALIGN - 1);
2890 chunk->start_data = (void*)((char*)chunk + offset);
2892 /* allocate the first page to the freelist */
2893 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2894 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2895 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
2896 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2897 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2901 /* assumes freelist for slot is empty, so try to alloc a new page */
2903 get_chunk_freelist (PinnedChunk *chunk, int slot)
2907 p = chunk->free_list [slot];
2909 chunk->free_list [slot] = *p;
2912 for (i = 0; i < chunk->num_pages; ++i) {
2914 if (chunk->page_sizes [i])
2916 size = freelist_sizes [slot];
2917 chunk->page_sizes [i] = size;
2918 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2922 p = chunk->free_list [slot];
2924 chunk->free_list [slot] = *p;
2931 alloc_from_freelist (size_t size)
2935 PinnedChunk *pchunk;
2936 slot = slot_for_size (size);
2937 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2938 g_assert (size <= freelist_sizes [slot]);
2939 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2940 void **p = pchunk->free_list [slot];
2942 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2943 pchunk->free_list [slot] = *p;
2947 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2948 res = get_chunk_freelist (pchunk, slot);
2952 pchunk = alloc_pinned_chunk (size);
2953 /* FIXME: handle OOM */
2954 pchunk->next = pinned_chunk_list;
2955 pinned_chunk_list = pchunk;
2956 res = get_chunk_freelist (pchunk, slot);
2960 /* used for the GC-internal data structures */
2961 /* FIXME: add support for bigger sizes by allocating more than one page
2965 get_internal_mem (size_t size)
2967 return calloc (1, size);
2971 PinnedChunk *pchunk;
2972 slot = slot_for_size (size);
2973 g_assert (size <= freelist_sizes [slot]);
2974 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2975 void **p = pchunk->free_list [slot];
2977 pchunk->free_list [slot] = *p;
2981 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2982 res = get_chunk_freelist (pchunk, slot);
2986 pchunk = alloc_pinned_chunk (size);
2987 /* FIXME: handle OOM */
2988 pchunk->next = internal_chunk_list;
2989 internal_chunk_list = pchunk;
2990 res = get_chunk_freelist (pchunk, slot);
2996 free_internal_mem (void *addr)
3000 PinnedChunk *pchunk;
3001 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
3002 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
3003 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
3004 int offset = (char*)addr - (char*)pchunk;
3005 int page = offset / FREELIST_PAGESIZE;
3006 int slot = slot_for_size (pchunk->page_sizes [page]);
3008 *p = pchunk->free_list [slot];
3009 pchunk->free_list [slot] = p;
3013 printf ("free of %p failed\n", addr);
3014 g_assert_not_reached ();
3019 * ######################################################################
3020 * ######## Object allocation
3021 * ######################################################################
3022 * This section of code deals with allocating memory for objects.
3023 * There are several ways:
3024 * *) allocate large objects
3025 * *) allocate normal objects
3026 * *) fast lock-free allocation
3027 * *) allocation of pinned objects
3031 free_large_object (LOSObject *obj)
3033 size_t size = obj->size;
3034 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
3036 los_memory_usage -= size;
3037 size += sizeof (LOSObject);
3038 size += pagesize - 1;
3039 size &= ~(pagesize - 1);
3040 total_alloc -= size;
3042 free_os_memory (obj, size);
3046 * Objects with size >= 64KB are allocated in the large object space.
3047 * They are currently kept track of with a linked list.
3048 * They don't move, so there is no need to pin them during collection
3049 * and we avoid the memcpy overhead.
3051 static void* __attribute__((noinline))
3052 alloc_large_inner (MonoVTable *vtable, size_t size)
3057 int just_did_major_gc = FALSE;
3059 if (los_memory_usage > next_los_collection) {
3060 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
3061 just_did_major_gc = TRUE;
3063 major_collection ();
3065 /* later increase based on a percent of the heap size */
3066 next_los_collection = los_memory_usage + 5*1024*1024;
3069 alloc_size += sizeof (LOSObject);
3070 alloc_size += pagesize - 1;
3071 alloc_size &= ~(pagesize - 1);
3072 /* FIXME: handle OOM */
3073 obj = get_os_memory (alloc_size, TRUE);
3075 vtslot = (void**)obj->data;
3077 total_alloc += alloc_size;
3078 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3079 obj->next = los_object_list;
3080 los_object_list = obj;
3081 los_memory_usage += size;
3083 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3087 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3088 * an object of size @size
3089 * Return FALSE if not found (which means we need a collection)
3092 search_fragment_for_size (size_t size)
3094 Fragment *frag, *prev;
3095 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3097 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3098 /* Clear the remaining space, pinning depends on this */
3099 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3102 for (frag = nursery_fragments; frag; frag = frag->next) {
3103 if (size <= (frag->fragment_end - frag->fragment_start)) {
3104 /* remove from the list */
3106 prev->next = frag->next;
3108 nursery_fragments = frag->next;
3109 nursery_next = frag->fragment_start;
3110 nursery_frag_real_end = frag->fragment_end;
3112 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3113 frag->next = fragment_freelist;
3114 fragment_freelist = frag;
3123 * size is already rounded up and we hold the GC lock.
3126 alloc_degraded (MonoVTable *vtable, size_t size)
3128 GCMemSection *section;
3130 for (section = section_list; section; section = section->next) {
3131 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3132 p = (void**)section->next_data;
3137 section = alloc_section (nursery_section->size * 4);
3138 /* FIXME: handle OOM */
3139 p = (void**)section->next_data;
3141 section->next_data += size;
3142 degraded_mode += size;
3143 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3149 * Provide a variant that takes just the vtable for small fixed-size objects.
3150 * The aligned size is already computed and stored in vt->gc_descr.
3151 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3152 * processing. We can keep track of where objects start, for example,
3153 * so when we scan the thread stacks for pinned objects, we can start
3154 * a search for the pinned object in SCAN_START_SIZE chunks.
3157 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3159 /* FIXME: handle OOM */
3164 size += ALLOC_ALIGN - 1;
3165 size &= ~(ALLOC_ALIGN - 1);
3167 g_assert (vtable->gc_descr);
3169 if (G_UNLIKELY (collect_before_allocs)) {
3172 if (nursery_section) {
3175 update_current_thread_stack (&dummy);
3177 collect_nursery (0);
3179 if (!degraded_mode && !search_fragment_for_size (size)) {
3181 g_assert_not_reached ();
3187 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3189 p = (void**)tlab_next;
3190 /* FIXME: handle overflow */
3191 new_next = (char*)p + size;
3192 tlab_next = new_next;
3194 if (G_LIKELY (new_next < tlab_temp_end)) {
3198 * FIXME: We might need a memory barrier here so the change to tlab_next is
3199 * visible before the vtable store.
3202 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3210 /* there are two cases: the object is too big or we run out of space in the TLAB */
3211 /* we also reach here when the thread does its first allocation after a minor
3212 * collection, since the tlab_ variables are initialized to NULL.
3213 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3214 * objects that need finalizers can have the high bit set in their size
3215 * so the above check fails and we can readily add the object to the queue.
3216 * This avoids taking again the GC lock when registering, but this is moot when
3217 * doing thread-local allocation, so it may not be a good idea.
3220 if (size > MAX_SMALL_OBJ_SIZE) {
3221 /* get ready for possible collection */
3222 update_current_thread_stack (&dummy);
3224 p = alloc_large_inner (vtable, size);
3226 if (tlab_next >= tlab_real_end) {
3228 * Run out of space in the TLAB. When this happens, some amount of space
3229 * remains in the TLAB, but not enough to satisfy the current allocation
3230 * request. Currently, we retire the TLAB in all cases, later we could
3231 * keep it if the remaining space is above a treshold, and satisfy the
3232 * allocation directly from the nursery.
3235 /* when running in degraded mode, we continue allocing that way
3236 * for a while, to decrease the number of useless nursery collections.
3238 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3239 p = alloc_degraded (vtable, size);
3244 if (size > tlab_size) {
3245 /* Allocate directly from the nursery */
3246 if (nursery_next + size >= nursery_frag_real_end) {
3247 if (!search_fragment_for_size (size)) {
3248 /* get ready for possible collection */
3249 update_current_thread_stack (&dummy);
3250 minor_collect_or_expand_inner (size);
3251 if (degraded_mode) {
3252 p = alloc_degraded (vtable, size);
3259 p = (void*)nursery_next;
3260 nursery_next += size;
3261 if (nursery_next > nursery_frag_real_end) {
3266 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3267 memset (p, 0, size);
3270 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3272 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3273 res = search_fragment_for_size (tlab_size);
3275 /* get ready for possible collection */
3276 update_current_thread_stack (&dummy);
3277 minor_collect_or_expand_inner (tlab_size);
3278 if (degraded_mode) {
3279 p = alloc_degraded (vtable, size);
3286 /* Allocate a new TLAB from the current nursery fragment */
3287 tlab_start = nursery_next;
3288 nursery_next += tlab_size;
3289 tlab_next = tlab_start;
3290 tlab_real_end = tlab_start + tlab_size;
3291 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3293 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3294 memset (tlab_start, 0, tlab_size);
3296 /* Allocate from the TLAB */
3297 p = (void*)tlab_next;
3299 g_assert (tlab_next <= tlab_real_end);
3301 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3304 /* Reached tlab_temp_end */
3306 /* record the scan start so we can find pinned objects more easily */
3307 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3308 /* we just bump tlab_temp_end as well */
3309 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3310 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3314 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3323 * To be used for interned strings and possibly MonoThread, reflection handles.
3324 * We may want to explicitly free these objects.
3327 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3329 /* FIXME: handle OOM */
3331 size += ALLOC_ALIGN - 1;
3332 size &= ~(ALLOC_ALIGN - 1);
3334 if (size > MAX_FREELIST_SIZE) {
3335 update_current_thread_stack (&p);
3336 /* large objects are always pinned anyway */
3337 p = alloc_large_inner (vtable, size);
3339 p = alloc_from_freelist (size);
3340 memset (p, 0, size);
3342 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3349 * ######################################################################
3350 * ######## Finalization support
3351 * ######################################################################
3355 * this is valid for the nursery: if the object has been forwarded it means it's
3356 * still refrenced from a root. If it is pinned it's still alive as well.
3357 * Return TRUE if @obj is ready to be finalized.
3359 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3362 finalize_in_range (char *start, char *end)
3364 FinalizeEntry *entry, *prev;
3368 for (i = 0; i < finalizable_hash_size; ++i) {
3370 for (entry = finalizable_hash [i]; entry;) {
3371 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3372 if (object_is_fin_ready (entry->object)) {
3374 FinalizeEntry *next;
3375 /* remove and put in fin_ready_list */
3377 prev->next = entry->next;
3379 finalizable_hash [i] = entry->next;
3381 num_ready_finalizers++;
3382 num_registered_finalizers--;
3383 entry->next = fin_ready_list;
3384 fin_ready_list = entry;
3385 /* Make it survive */
3386 from = entry->object;
3387 entry->object = copy_object (entry->object, start, end);
3388 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3392 /* update pointer */
3393 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3394 entry->object = copy_object (entry->object, start, end);
3398 entry = entry->next;
3404 null_link_in_range (char *start, char *end)
3406 FinalizeEntry *entry, *prev;
3408 for (i = 0; i < disappearing_link_hash_size; ++i) {
3410 for (entry = disappearing_link_hash [i]; entry;) {
3411 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3412 if (object_is_fin_ready (entry->object)) {
3413 void **p = entry->data;
3416 /* remove from list */
3418 prev->next = entry->next;
3420 disappearing_link_hash [i] = entry->next;
3421 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3423 free_internal_mem (entry);
3425 num_disappearing_links--;
3429 /* update pointer if it's moved
3430 * FIXME: what if an object is moved earlier?
3432 entry->object = copy_object (entry->object, start, end);
3433 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3435 *link = entry->object;
3439 entry = entry->next;
3445 * mono_gc_finalizers_for_domain:
3446 * @domain: the unloading appdomain
3447 * @out_array: output array
3448 * @out_size: size of output array
3450 * Store inside @out_array up to @out_size objects that belong to the unloading
3451 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3452 * until it returns 0.
3453 * The items are removed from the finalizer data structure, so the caller is supposed
3455 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3458 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3460 FinalizeEntry *entry, *prev;
3462 if (no_finalize || !out_size || !out_array)
3466 for (i = 0; i < finalizable_hash_size; ++i) {
3468 for (entry = finalizable_hash [i]; entry;) {
3469 if (mono_object_domain (entry->object) == domain) {
3470 FinalizeEntry *next;
3471 /* remove and put in out_array */
3473 prev->next = entry->next;
3475 finalizable_hash [i] = entry->next;
3477 num_registered_finalizers--;
3478 out_array [count ++] = entry->object;
3479 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3481 if (count == out_size) {
3488 entry = entry->next;
3496 rehash_fin_table (void)
3500 FinalizeEntry **new_hash;
3501 FinalizeEntry *entry, *next;
3502 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3504 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3505 for (i = 0; i < finalizable_hash_size; ++i) {
3506 for (entry = finalizable_hash [i]; entry; entry = next) {
3507 hash = mono_object_hash (entry->object) % new_size;
3509 entry->next = new_hash [hash];
3510 new_hash [hash] = entry;
3513 free_internal_mem (finalizable_hash);
3514 finalizable_hash = new_hash;
3515 finalizable_hash_size = new_size;
3519 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3521 FinalizeEntry *entry, *prev;
3525 hash = mono_object_hash (obj);
3527 if (num_registered_finalizers >= finalizable_hash_size * 2)
3528 rehash_fin_table ();
3529 hash %= finalizable_hash_size;
3531 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3532 if (entry->object == obj) {
3534 entry->data = user_data;
3536 /* remove from the list */
3538 prev->next = entry->next;
3540 finalizable_hash [hash] = entry->next;
3541 num_registered_finalizers--;
3542 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3543 free_internal_mem (entry);
3551 /* request to deregister, but already out of the list */
3555 entry = get_internal_mem (sizeof (FinalizeEntry));
3556 entry->object = obj;
3557 entry->data = user_data;
3558 entry->next = finalizable_hash [hash];
3559 finalizable_hash [hash] = entry;
3560 num_registered_finalizers++;
3561 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3566 rehash_dislink (void)
3570 FinalizeEntry **new_hash;
3571 FinalizeEntry *entry, *next;
3572 int new_size = g_spaced_primes_closest (num_disappearing_links);
3574 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3575 for (i = 0; i < disappearing_link_hash_size; ++i) {
3576 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3577 hash = mono_aligned_addr_hash (entry->data) % new_size;
3579 entry->next = new_hash [hash];
3580 new_hash [hash] = entry;
3583 free_internal_mem (disappearing_link_hash);
3584 disappearing_link_hash = new_hash;
3585 disappearing_link_hash_size = new_size;
3589 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3591 FinalizeEntry *entry, *prev;
3595 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3597 /* FIXME: add check that link is not in the heap */
3598 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3599 entry = disappearing_link_hash [hash];
3601 for (; entry; entry = entry->next) {
3602 /* link already added */
3603 if (link == entry->data) {
3604 /* NULL obj means remove */
3607 prev->next = entry->next;
3609 disappearing_link_hash [hash] = entry->next;
3610 num_disappearing_links--;
3611 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3612 free_internal_mem (entry);
3614 entry->object = obj; /* we allow the change of object */
3621 entry = get_internal_mem (sizeof (FinalizeEntry));
3622 entry->object = obj;
3624 entry->next = disappearing_link_hash [hash];
3625 disappearing_link_hash [hash] = entry;
3626 num_disappearing_links++;
3627 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3632 mono_gc_invoke_finalizers (void)
3634 FinalizeEntry *entry;
3637 /* FIXME: batch to reduce lock contention */
3638 while (fin_ready_list) {
3640 entry = fin_ready_list;
3642 fin_ready_list = entry->next;
3643 num_ready_finalizers--;
3644 obj = entry->object;
3645 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3649 void (*callback)(void *, void*) = entry->data;
3651 obj = entry->object;
3653 /* the object is on the stack so it is pinned */
3654 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3655 callback (obj, NULL);
3656 free_internal_mem (entry);
3663 mono_gc_pending_finalizers (void)
3665 return fin_ready_list != NULL;
3668 /* Negative value to remove */
3670 mono_gc_add_memory_pressure (gint64 value)
3672 /* FIXME: Use interlocked functions */
3674 memory_pressure += value;
3679 * ######################################################################
3680 * ######## registered roots support
3681 * ######################################################################
3689 RootRecord **new_hash;
3690 RootRecord *entry, *next;
3691 int new_size = g_spaced_primes_closest (num_roots_entries);
3693 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3694 for (i = 0; i < roots_hash_size; ++i) {
3695 for (entry = roots_hash [i]; entry; entry = next) {
3696 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3698 entry->next = new_hash [hash];
3699 new_hash [hash] = entry;
3702 free_internal_mem (roots_hash);
3703 roots_hash = new_hash;
3704 roots_hash_size = new_size;
3708 * We do not coalesce roots.
3711 mono_gc_register_root (char *start, size_t size, void *descr)
3713 RootRecord *new_root;
3714 unsigned int hash = mono_aligned_addr_hash (start);
3716 if (num_roots_entries >= roots_hash_size * 2)
3718 hash %= roots_hash_size;
3719 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3720 /* we allow changing the size and the descriptor (for thread statics etc) */
3721 if (new_root->start_root == start) {
3722 size_t old_size = new_root->end_root - new_root->start_root;
3723 new_root->end_root = new_root->start_root + size;
3724 new_root->root_desc = (mword)descr;
3726 roots_size -= old_size;
3731 new_root = get_internal_mem (sizeof (RootRecord));
3733 new_root->start_root = start;
3734 new_root->end_root = new_root->start_root + size;
3735 new_root->root_desc = (mword)descr;
3737 num_roots_entries++;
3738 new_root->next = roots_hash [hash];
3739 roots_hash [hash] = new_root;
3740 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3750 mono_gc_deregister_root (char* addr)
3752 RootRecord *tmp, *prev = NULL;
3753 unsigned int hash = mono_aligned_addr_hash (addr);
3755 hash %= roots_hash_size;
3756 tmp = roots_hash [hash];
3758 if (tmp->start_root == (char*)addr) {
3760 prev->next = tmp->next;
3762 roots_hash [hash] = tmp->next;
3763 roots_size -= (tmp->end_root - tmp->start_root);
3764 num_roots_entries--;
3765 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3766 free_internal_mem (tmp);
3776 * ######################################################################
3777 * ######## Thread handling (stop/start code)
3778 * ######################################################################
3781 /* eventually share with MonoThread? */
3782 typedef struct _SgenThreadInfo SgenThreadInfo;
3784 struct _SgenThreadInfo {
3785 SgenThreadInfo *next;
3786 ARCH_THREAD_TYPE id;
3787 unsigned int stop_count; /* to catch duplicate signals */
3792 char **tlab_next_addr;
3793 char **tlab_start_addr;
3794 char **tlab_temp_end_addr;
3795 char **tlab_real_end_addr;
3796 RememberedSet *remset;
3799 /* FIXME: handle large/small config */
3800 #define THREAD_HASH_SIZE 11
3801 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3803 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3805 #if USE_SIGNAL_BASED_START_STOP_WORLD
3807 static sem_t suspend_ack_semaphore;
3808 static unsigned int global_stop_count = 0;
3809 static int suspend_signal_num = SIGPWR;
3810 static int restart_signal_num = SIGXCPU;
3811 static sigset_t suspend_signal_mask;
3812 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3814 /* LOCKING: assumes the GC lock is held */
3815 static SgenThreadInfo*
3816 thread_info_lookup (ARCH_THREAD_TYPE id)
3818 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3819 SgenThreadInfo *info;
3821 info = thread_table [hash];
3822 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
3829 update_current_thread_stack (void *start)
3831 void *ptr = cur_thread_regs;
3832 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
3833 info->stack_start = align_pointer (&ptr);
3834 ARCH_STORE_REGS (ptr);
3838 signal_desc (int signum)
3840 if (signum == suspend_signal_num)
3842 if (signum == restart_signal_num)
3847 /* LOCKING: assumes the GC lock is held */
3849 thread_handshake (int signum)
3851 int count, i, result;
3852 SgenThreadInfo *info;
3853 pthread_t me = pthread_self ();
3856 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3857 for (info = thread_table [i]; info; info = info->next) {
3858 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3859 if (ARCH_THREAD_EQUALS (info->id, me)) {
3860 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3863 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3865 result = pthread_kill (info->id, signum);
3867 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3870 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3876 for (i = 0; i < count; ++i) {
3877 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3878 if (errno != EINTR) {
3879 g_error ("sem_wait ()");
3886 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3888 suspend_handler (int sig)
3890 SgenThreadInfo *info;
3893 int old_errno = errno;
3895 id = pthread_self ();
3896 info = thread_info_lookup (id);
3897 stop_count = global_stop_count;
3898 /* duplicate signal */
3899 if (0 && info->stop_count == stop_count) {
3903 /* update the remset info in the thread data structure */
3904 info->remset = remembered_set;
3906 * this includes the register values that the kernel put on the stack.
3907 * Write arch-specific code to only push integer regs and a more accurate
3910 info->stack_start = align_pointer (&id);
3912 /* notify the waiting thread */
3913 sem_post (&suspend_ack_semaphore);
3914 info->stop_count = stop_count;
3916 /* wait until we receive the restart signal */
3919 sigsuspend (&suspend_signal_mask);
3920 } while (info->signal != restart_signal_num);
3922 /* notify the waiting thread */
3923 sem_post (&suspend_ack_semaphore);
3929 restart_handler (int sig)
3931 SgenThreadInfo *info;
3932 int old_errno = errno;
3934 info = thread_info_lookup (pthread_self ());
3935 info->signal = restart_signal_num;
3940 static TV_DECLARE (stop_world_time);
3941 static unsigned long max_pause_usec = 0;
3943 /* LOCKING: assumes the GC lock is held */
3949 global_stop_count++;
3950 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
3951 TV_GETTIME (stop_world_time);
3952 count = thread_handshake (suspend_signal_num);
3953 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3957 /* LOCKING: assumes the GC lock is held */
3959 restart_world (void)
3962 TV_DECLARE (end_sw);
3965 count = thread_handshake (restart_signal_num);
3966 TV_GETTIME (end_sw);
3967 usec = TV_ELAPSED (stop_world_time, end_sw);
3968 max_pause_usec = MAX (usec, max_pause_usec);
3969 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3973 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
3976 * Identify objects pinned in a thread stack and its registers.
3979 pin_thread_data (void *start_nursery, void *end_nursery)
3982 SgenThreadInfo *info;
3984 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3985 for (info = thread_table [i]; info; info = info->next) {
3987 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3990 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3991 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3994 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3995 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
3999 find_pinning_ref_from_thread (char *obj, size_t size)
4002 SgenThreadInfo *info;
4003 char *endobj = obj + size;
4005 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4006 for (info = thread_table [i]; info; info = info->next) {
4007 char **start = (char**)info->stack_start;
4010 while (start < (char**)info->stack_end) {
4011 if (*start >= obj && *start < endobj) {
4012 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
4018 /* FIXME: check register */
4021 /* return TRUE if ptr points inside the managed heap */
4023 ptr_in_heap (void* ptr)
4025 mword p = (mword)ptr;
4026 if (p < lowest_heap_address || p >= highest_heap_address)
4028 /* FIXME: more checks */
4033 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
4039 /* FIXME: exclude stack locations */
4040 switch ((*p) & REMSET_TYPE_MASK) {
4041 case REMSET_LOCATION:
4043 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
4044 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4045 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
4046 if (!global && *ptr >= start_nursery && *ptr < end_nursery) {
4048 * If the object is pinned, each reference to it from nonpinned objects
4049 * becomes part of the global remset, which can grow very large.
4051 DEBUG (9, fprintf (gc_debug_file, "Add to global remset because of pinning %p (%p %s)\n", ptr, *ptr, safe_name (*ptr)));
4052 add_to_global_remset (ptr);
4055 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
4059 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4060 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4063 while (count-- > 0) {
4064 *ptr = copy_object (*ptr, start_nursery, end_nursery);
4065 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
4066 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
4067 add_to_global_remset (ptr);
4072 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4073 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4075 scan_object (*ptr, start_nursery, end_nursery);
4078 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4079 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4082 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4085 g_assert_not_reached ();
4091 scan_from_remsets (void *start_nursery, void *end_nursery)
4094 SgenThreadInfo *info;
4095 RememberedSet *remset, *next;
4096 mword *p, *next_p, *store_pos;
4098 /* the global one */
4099 for (remset = global_remset; remset; remset = remset->next) {
4100 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4101 store_pos = remset->data;
4102 for (p = remset->data; p < remset->store_next; p = next_p) {
4105 next_p = handle_remset (p, start_nursery, end_nursery, TRUE);
4108 * Clear global remsets of locations which no longer point to the
4109 * nursery. Otherwise, they could grow indefinitely between major
4113 g_assert ((ptr & REMSET_TYPE_MASK) == REMSET_LOCATION);
4114 if (ptr_in_nursery (*(void**)ptr))
4118 /* Truncate the remset */
4119 remset->store_next = store_pos;
4122 /* the per-thread ones */
4123 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4124 for (info = thread_table [i]; info; info = info->next) {
4125 for (remset = info->remset; remset; remset = next) {
4126 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4127 for (p = remset->data; p < remset->store_next;) {
4128 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4130 remset->store_next = remset->data;
4131 next = remset->next;
4132 remset->next = NULL;
4133 if (remset != info->remset) {
4134 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4135 free_internal_mem (remset);
4143 * Clear the info in the remembered sets: we're doing a major collection, so
4144 * the per-thread ones are not needed and the global ones will be reconstructed
4148 clear_remsets (void)
4151 SgenThreadInfo *info;
4152 RememberedSet *remset, *next;
4154 /* the global list */
4155 for (remset = global_remset; remset; remset = next) {
4156 remset->store_next = remset->data;
4157 next = remset->next;
4158 remset->next = NULL;
4159 if (remset != global_remset) {
4160 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4161 free_internal_mem (remset);
4164 /* the per-thread ones */
4165 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4166 for (info = thread_table [i]; info; info = info->next) {
4167 for (remset = info->remset; remset; remset = next) {
4168 remset->store_next = remset->data;
4169 next = remset->next;
4170 remset->next = NULL;
4171 if (remset != info->remset) {
4172 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4173 free_internal_mem (remset);
4181 * Clear the thread local TLAB variables for all threads.
4186 SgenThreadInfo *info;
4189 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4190 for (info = thread_table [i]; info; info = info->next) {
4191 /* A new TLAB will be allocated when the thread does its first allocation */
4192 *info->tlab_start_addr = NULL;
4193 *info->tlab_next_addr = NULL;
4194 *info->tlab_temp_end_addr = NULL;
4195 *info->tlab_real_end_addr = NULL;
4201 * Find the tlab_next value of the TLAB which contains ADDR.
4204 find_tlab_next_from_address (char *addr)
4206 SgenThreadInfo *info;
4209 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4210 for (info = thread_table [i]; info; info = info->next) {
4211 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4212 return *info->tlab_next_addr;
4219 /* LOCKING: assumes the GC lock is held */
4220 static SgenThreadInfo*
4221 gc_register_current_thread (void *addr)
4224 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4227 info->id = ARCH_GET_THREAD ();
4228 info->stop_count = -1;
4231 info->stack_start = NULL;
4232 info->tlab_start_addr = &tlab_start;
4233 info->tlab_next_addr = &tlab_next;
4234 info->tlab_temp_end_addr = &tlab_temp_end;
4235 info->tlab_real_end_addr = &tlab_real_end;
4237 tlab_next_addr = &tlab_next;
4239 /* try to get it with attributes first */
4240 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4244 pthread_attr_t attr;
4245 pthread_getattr_np (pthread_self (), &attr);
4246 pthread_attr_getstack (&attr, &sstart, &size);
4247 info->stack_end = (char*)sstart + size;
4248 pthread_attr_destroy (&attr);
4250 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4251 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4254 /* FIXME: we assume the stack grows down */
4255 gsize stack_bottom = (gsize)addr;
4256 stack_bottom += 4095;
4257 stack_bottom &= ~4095;
4258 info->stack_end = (char*)stack_bottom;
4262 /* hash into the table */
4263 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4264 info->next = thread_table [hash];
4265 thread_table [hash] = info;
4267 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4268 pthread_setspecific (remembered_set_key, remembered_set);
4269 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4274 unregister_current_thread (void)
4277 SgenThreadInfo *prev = NULL;
4279 RememberedSet *rset;
4280 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4282 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4283 p = thread_table [hash];
4285 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4286 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4291 thread_table [hash] = p->next;
4293 prev->next = p->next;
4296 /* FIXME: transfer remsets if any */
4298 RememberedSet *next = rset->next;
4299 free_internal_mem (rset);
4306 unregister_thread (void *k)
4309 unregister_current_thread ();
4314 mono_gc_register_thread (void *baseptr)
4316 SgenThreadInfo *info;
4318 info = thread_info_lookup (ARCH_GET_THREAD ());
4320 info = gc_register_current_thread (baseptr);
4322 return info != NULL;
4325 #if USE_PTHREAD_INTERCEPT
4327 #undef pthread_create
4329 #undef pthread_detach
4332 void *(*start_routine) (void *);
4336 } SgenThreadStartInfo;
4339 gc_start_thread (void *arg)
4341 SgenThreadStartInfo *start_info = arg;
4342 SgenThreadInfo* info;
4343 void *t_arg = start_info->arg;
4344 void *(*start_func) (void*) = start_info->start_routine;
4348 info = gc_register_current_thread (&result);
4350 sem_post (&(start_info->registered));
4351 result = start_func (t_arg);
4353 * this is done by the pthread key dtor
4355 unregister_current_thread ();
4363 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4365 SgenThreadStartInfo *start_info;
4368 start_info = malloc (sizeof (SgenThreadStartInfo));
4371 sem_init (&(start_info->registered), 0, 0);
4372 start_info->arg = arg;
4373 start_info->start_routine = start_routine;
4375 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4377 while (sem_wait (&(start_info->registered)) != 0) {
4378 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4381 sem_destroy (&(start_info->registered));
4387 mono_gc_pthread_join (pthread_t thread, void **retval)
4389 return pthread_join (thread, retval);
4393 mono_gc_pthread_detach (pthread_t thread)
4395 return pthread_detach (thread);
4398 #endif /* USE_PTHREAD_INTERCEPT */
4401 * ######################################################################
4402 * ######## Write barriers
4403 * ######################################################################
4406 static RememberedSet*
4407 alloc_remset (int size, gpointer id) {
4408 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4409 res->store_next = res->data;
4410 res->end_set = res->data + size;
4412 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4417 * Note: the write barriers first do the needed GC work and then do the actual store:
4418 * this way the value is visible to the conservative GC scan after the write barrier
4419 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4420 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4421 * happens when we need to record which pointers contain references to the new generation.
4422 * The write barrier will be executed, but the pointer is still not stored.
4425 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4428 if ((char*)field_ptr >= nursery_start && (char*)field_ptr < nursery_real_end) {
4429 *(void**)field_ptr = value;
4432 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4433 rs = remembered_set;
4434 if (rs->store_next < rs->end_set) {
4435 *(rs->store_next++) = (mword)field_ptr;
4436 *(void**)field_ptr = value;
4439 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4440 rs->next = remembered_set;
4441 remembered_set = rs;
4442 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4443 *(rs->store_next++) = (mword)field_ptr;
4444 *(void**)field_ptr = value;
4448 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4450 RememberedSet *rs = remembered_set;
4451 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end) {
4452 *(void**)slot_ptr = value;
4455 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4456 if (rs->store_next < rs->end_set) {
4457 *(rs->store_next++) = (mword)slot_ptr;
4458 *(void**)slot_ptr = value;
4461 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4462 rs->next = remembered_set;
4463 remembered_set = rs;
4464 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4465 *(rs->store_next++) = (mword)slot_ptr;
4466 *(void**)slot_ptr = value;
4470 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4472 RememberedSet *rs = remembered_set;
4473 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end)
4475 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4476 if (rs->store_next + 1 < rs->end_set) {
4477 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4478 *(rs->store_next++) = count;
4481 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4482 rs->next = remembered_set;
4483 remembered_set = rs;
4484 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4485 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4486 *(rs->store_next++) = count;
4490 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4492 RememberedSet *rs = remembered_set;
4493 if ((char*)ptr >= nursery_start && (char*)ptr < nursery_real_end) {
4494 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4495 *(void**)ptr = value;
4498 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p (%s)\n", ptr, value ? safe_name (value) : "null"));
4499 /* FIXME: ensure it is on the heap */
4500 if (rs->store_next < rs->end_set) {
4501 *(rs->store_next++) = (mword)ptr;
4502 *(void**)ptr = value;
4505 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4506 rs->next = remembered_set;
4507 remembered_set = rs;
4508 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4509 *(rs->store_next++) = (mword)ptr;
4510 *(void**)ptr = value;
4514 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4516 RememberedSet *rs = remembered_set;
4517 if ((char*)dest >= nursery_start && (char*)dest < nursery_real_end) {
4520 DEBUG (8, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4522 if (rs->store_next + 1 < rs->end_set) {
4523 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4524 *(rs->store_next++) = (mword)klass->gc_descr;
4527 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4528 rs->next = remembered_set;
4529 remembered_set = rs;
4530 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4531 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4532 *(rs->store_next++) = (mword)klass->gc_descr;
4536 * mono_gc_wbarrier_object:
4538 * Write barrier to call when obj is the result of a clone or copy of an object.
4541 mono_gc_wbarrier_object (MonoObject* obj)
4543 RememberedSet *rs = remembered_set;
4544 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4545 if (rs->store_next < rs->end_set) {
4546 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4549 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4550 rs->next = remembered_set;
4551 remembered_set = rs;
4552 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4553 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4557 * ######################################################################
4558 * ######## Collector debugging
4559 * ######################################################################
4562 const char*descriptor_types [] = {
4574 describe_ptr (char *ptr)
4576 GCMemSection *section;
4581 if ((ptr >= nursery_start) && (ptr < nursery_real_end)) {
4582 printf ("Pointer inside nursery.\n");
4584 for (section = section_list; section;) {
4585 if (ptr >= section->data && ptr < section->data + section->size)
4587 section = section->next;
4591 printf ("Pointer inside oldspace.\n");
4592 } else if (obj_is_from_pinned_alloc (ptr)) {
4593 printf ("Pointer is inside a pinned chunk.\n");
4595 printf ("Pointer unknown.\n");
4600 if (object_is_pinned (ptr))
4601 printf ("Object is pinned.\n");
4603 // FIXME: Handle pointers to the inside of objects
4604 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
4606 printf ("VTable: %p\n", vtable);
4607 if (vtable == NULL) {
4608 printf ("VTable is invalid (empty).\n");
4611 if (((char*)vtable >= nursery_start) && ((char*)vtable < nursery_real_end)) {
4612 printf ("VTable is invalid (points inside nursery).\n");
4615 printf ("Class: %s\n", vtable->klass->name);
4617 desc = ((GCVTable*)vtable)->desc;
4618 printf ("Descriptor: %lx\n", desc);
4621 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
4625 find_in_remset_loc (mword *p, char *addr, gboolean *found)
4631 switch ((*p) & REMSET_TYPE_MASK) {
4632 case REMSET_LOCATION:
4633 if (*p == (mword)addr)
4637 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4639 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4643 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4644 count = safe_object_get_size ((MonoObject*)ptr);
4645 count += (ALLOC_ALIGN - 1);
4646 count &= (ALLOC_ALIGN - 1);
4647 count /= sizeof (mword);
4648 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4652 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4655 switch (desc & 0x7) {
4656 case DESC_TYPE_RUN_LENGTH:
4657 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
4658 /* The descriptor includes the size of MonoObject */
4659 skip_size -= sizeof (MonoObject);
4660 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
4665 g_assert_not_reached ();
4670 g_assert_not_reached ();
4676 * Return whenever ADDR occurs in the remembered sets
4679 find_in_remsets (char *addr)
4682 SgenThreadInfo *info;
4683 RememberedSet *remset;
4685 gboolean found = FALSE;
4687 /* the global one */
4688 for (remset = global_remset; remset; remset = remset->next) {
4689 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4690 for (p = remset->data; p < remset->store_next;) {
4691 p = find_in_remset_loc (p, addr, &found);
4696 /* the per-thread ones */
4697 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4698 for (info = thread_table [i]; info; info = info->next) {
4699 for (remset = info->remset; remset; remset = remset->next) {
4700 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4701 for (p = remset->data; p < remset->store_next;) {
4702 p = find_in_remset_loc (p, addr, &found);
4714 #define HANDLE_PTR(ptr,obj) do { \
4715 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
4716 if (!find_in_remsets ((char*)(ptr))) { \
4717 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
4718 g_assert_not_reached (); \
4724 * Check that each object reference inside the area which points into the nursery
4725 * can be found in the remembered sets.
4727 static void __attribute__((noinline))
4728 check_remsets_for_area (char *start, char *end)
4733 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
4735 new_obj_references = 0;
4736 obj_references_checked = 0;
4737 while (start < end) {
4738 if (!*(void**)start) {
4739 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
4742 vt = (GCVTable*)LOAD_VTABLE (start);
4743 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
4745 MonoObject *obj = (MonoObject*)start;
4746 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
4750 if (type == DESC_TYPE_STRING) {
4751 STRING_SIZE (skip_size, start);
4755 } else if (type == DESC_TYPE_RUN_LENGTH) {
4756 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
4757 g_assert (skip_size);
4758 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
4762 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
4763 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
4764 skip_size *= mono_array_length ((MonoArray*)start);
4765 skip_size += sizeof (MonoArray);
4766 skip_size += (ALLOC_ALIGN - 1);
4767 skip_size &= ~(ALLOC_ALIGN - 1);
4768 OBJ_VECTOR_FOREACH_PTR (vt, start);
4769 if (((MonoArray*)start)->bounds) {
4770 /* account for the bounds */
4771 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4776 } else if (type == DESC_TYPE_SMALL_BITMAP) {
4777 OBJ_BITMAP_SIZE (skip_size, desc, start);
4778 g_assert (skip_size);
4779 OBJ_BITMAP_FOREACH_PTR (desc,start);
4783 } else if (type == DESC_TYPE_LARGE_BITMAP) {
4784 skip_size = safe_object_get_size ((MonoObject*)start);
4785 skip_size += (ALLOC_ALIGN - 1);
4786 skip_size &= ~(ALLOC_ALIGN - 1);
4787 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
4791 } else if (type == DESC_TYPE_COMPLEX) {
4792 /* this is a complex object */
4793 skip_size = safe_object_get_size ((MonoObject*)start);
4794 skip_size += (ALLOC_ALIGN - 1);
4795 skip_size &= ~(ALLOC_ALIGN - 1);
4796 OBJ_COMPLEX_FOREACH_PTR (vt, start);
4800 } else if (type == DESC_TYPE_COMPLEX_ARR) {
4801 /* this is an array of complex structs */
4802 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
4803 skip_size *= mono_array_length ((MonoArray*)start);
4804 skip_size += sizeof (MonoArray);
4805 skip_size += (ALLOC_ALIGN - 1);
4806 skip_size &= ~(ALLOC_ALIGN - 1);
4807 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
4808 if (((MonoArray*)start)->bounds) {
4809 /* account for the bounds */
4810 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4822 * Perform consistency check of the heap.
4824 * Assumes the world is stopped.
4827 check_consistency (void)
4829 GCMemSection *section;
4831 // Need to add more checks
4832 // FIXME: Create a general heap enumeration function and use that
4834 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
4836 // Check that oldspace->newspace pointers are registered with the collector
4837 for (section = section_list; section; section = section->next) {
4838 if (section->role == MEMORY_ROLE_GEN0)
4840 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
4841 check_remsets_for_area (section->data, section->next_data);
4844 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
4847 /* Check that the reference is valid */
4849 #define HANDLE_PTR(ptr,obj) do { \
4851 g_assert (safe_name (*(ptr)) != NULL); \
4858 * Perform consistency check on an object. Currently we only check that the
4859 * reference fields are valid.
4862 check_object (char *start)
4871 vt = (GCVTable*)LOAD_VTABLE (start);
4872 //type = vt->desc & 0x7;
4875 switch (desc & 0x7) {
4876 case DESC_TYPE_STRING:
4877 STRING_SIZE (skip_size, start);
4878 return start + skip_size;
4879 case DESC_TYPE_RUN_LENGTH:
4880 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
4881 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
4882 g_assert (skip_size);
4883 return start + skip_size;
4884 case DESC_TYPE_ARRAY:
4885 case DESC_TYPE_VECTOR:
4886 OBJ_VECTOR_FOREACH_PTR (vt, start);
4887 skip_size = safe_object_get_size ((MonoObject*)start);
4888 skip_size += (ALLOC_ALIGN - 1);
4889 skip_size &= ~(ALLOC_ALIGN - 1);
4890 return start + skip_size;
4891 case DESC_TYPE_SMALL_BITMAP:
4892 OBJ_BITMAP_FOREACH_PTR (desc,start);
4893 OBJ_BITMAP_SIZE (skip_size, desc, start);
4894 return start + skip_size;
4895 case DESC_TYPE_LARGE_BITMAP:
4896 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
4897 skip_size = safe_object_get_size ((MonoObject*)start);
4898 skip_size += (ALLOC_ALIGN - 1);
4899 skip_size &= ~(ALLOC_ALIGN - 1);
4900 return start + skip_size;
4901 case DESC_TYPE_COMPLEX:
4902 OBJ_COMPLEX_FOREACH_PTR (vt, start);
4903 /* this is a complex object */
4904 skip_size = safe_object_get_size ((MonoObject*)start);
4905 skip_size += (ALLOC_ALIGN - 1);
4906 skip_size &= ~(ALLOC_ALIGN - 1);
4907 return start + skip_size;
4908 case DESC_TYPE_COMPLEX_ARR:
4909 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
4910 /* this is an array of complex structs */
4911 skip_size = safe_object_get_size ((MonoObject*)start);
4912 skip_size += (ALLOC_ALIGN - 1);
4913 skip_size &= ~(ALLOC_ALIGN - 1);
4914 return start + skip_size;
4916 g_assert_not_reached ();
4921 * ######################################################################
4922 * ######## Other mono public interface functions.
4923 * ######################################################################
4927 mono_gc_collect (int generation)
4930 update_current_thread_stack (&generation);
4932 if (generation == 0) {
4933 collect_nursery (0);
4935 major_collection ();
4942 mono_gc_max_generation (void)
4948 mono_gc_collection_count (int generation)
4950 if (generation == 0)
4951 return num_minor_gcs;
4952 return num_major_gcs;
4956 mono_gc_get_used_size (void)
4959 GCMemSection *section;
4961 tot = los_memory_usage;
4962 for (section = section_list; section; section = section->next) {
4963 /* this is approximate... */
4964 tot += section->next_data - section->data;
4966 /* FIXME: account for pinned objects */
4972 mono_gc_get_heap_size (void)
4978 mono_gc_disable (void)
4986 mono_gc_enable (void)
4994 mono_object_is_alive (MonoObject* o)
5000 mono_gc_get_generation (MonoObject *obj)
5002 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
5008 mono_gc_enable_events (void)
5013 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
5015 mono_gc_register_disappearing_link (obj, link_addr);
5020 mono_gc_weak_link_remove (void **link_addr)
5022 mono_gc_register_disappearing_link (NULL, link_addr);
5027 mono_gc_weak_link_get (void **link_addr)
5033 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
5035 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
5036 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
5039 /* conservative scanning */
5040 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
5045 mono_gc_alloc_fixed (size_t size, void *descr)
5047 /* FIXME: do a single allocation */
5048 void *res = calloc (1, size);
5051 if (!mono_gc_register_root (res, size, descr)) {
5059 mono_gc_free_fixed (void* addr)
5061 mono_gc_deregister_root (addr);
5066 mono_gc_is_gc_thread (void)
5070 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
5076 mono_gc_base_init (void)
5080 struct sigaction sinfo;
5082 LOCK_INIT (gc_mutex);
5084 if (gc_initialized) {
5088 pagesize = mono_pagesize ();
5089 gc_debug_file = stderr;
5090 if ((env = getenv ("MONO_GC_DEBUG"))) {
5091 opts = g_strsplit (env, ",", -1);
5092 for (ptr = opts; ptr && *ptr; ptr ++) {
5094 if (opt [0] >= '0' && opt [0] <= '9') {
5095 gc_debug_level = atoi (opt);
5100 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
5101 gc_debug_file = fopen (rf, "wb");
5103 gc_debug_file = stderr;
5106 } else if (!strcmp (opt, "collect-before-allocs")) {
5107 collect_before_allocs = TRUE;
5108 } else if (!strcmp (opt, "check-at-minor-collections")) {
5109 consistency_check_at_minor_collection = TRUE;
5111 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
5112 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
5113 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections.\n");
5120 sem_init (&suspend_ack_semaphore, 0, 0);
5122 sigfillset (&sinfo.sa_mask);
5123 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
5124 sinfo.sa_handler = suspend_handler;
5125 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
5126 g_error ("failed sigaction");
5129 sinfo.sa_handler = restart_handler;
5130 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
5131 g_error ("failed sigaction");
5134 sigfillset (&suspend_signal_mask);
5135 sigdelset (&suspend_signal_mask, restart_signal_num);
5137 global_remset = alloc_remset (1024, NULL);
5138 global_remset->next = NULL;
5140 pthread_key_create (&remembered_set_key, unregister_thread);
5141 gc_initialized = TRUE;
5143 mono_gc_register_thread (&sinfo);
5151 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
5152 * for each class. This is currently not easy to do, as it is hard to generate basic
5153 * blocks + branches, but it is easy with the linear IL codebase.
5156 create_allocator (int atype)
5158 int tlab_next_addr_offset = -1;
5159 int tlab_temp_end_offset = -1;
5160 int p_var, size_var, tlab_next_addr_var, new_next_var;
5161 guint32 slowpath_branch;
5162 MonoMethodBuilder *mb;
5164 MonoMethodSignature *csig;
5165 static gboolean registered = FALSE;
5167 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5168 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5170 g_assert (tlab_next_addr_offset != -1);
5171 g_assert (tlab_temp_end_offset != -1);
5173 g_assert (atype == ATYPE_NORMAL);
5176 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5180 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5181 csig->ret = &mono_defaults.object_class->byval_arg;
5182 csig->params [0] = &mono_defaults.int_class->byval_arg;
5184 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5185 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5186 /* size = vtable->klass->instance_size; */
5187 mono_mb_emit_ldarg (mb, 0);
5188 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5189 mono_mb_emit_byte (mb, CEE_ADD);
5190 mono_mb_emit_byte (mb, CEE_LDIND_I);
5191 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5192 mono_mb_emit_byte (mb, CEE_ADD);
5193 /* FIXME: assert instance_size stays a 4 byte integer */
5194 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5195 mono_mb_emit_stloc (mb, size_var);
5197 /* size += ALLOC_ALIGN - 1; */
5198 mono_mb_emit_ldloc (mb, size_var);
5199 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5200 mono_mb_emit_byte (mb, CEE_ADD);
5201 /* size &= ~(ALLOC_ALIGN - 1); */
5202 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5203 mono_mb_emit_byte (mb, CEE_AND);
5204 mono_mb_emit_stloc (mb, size_var);
5207 * We need to modify tlab_next, but the JIT only supports reading, so we read
5208 * another tls var holding its address instead.
5211 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5212 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5213 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5214 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5215 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5216 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5218 /* p = (void**)tlab_next; */
5219 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5220 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5221 mono_mb_emit_byte (mb, CEE_LDIND_I);
5222 mono_mb_emit_stloc (mb, p_var);
5224 /* new_next = (char*)p + size; */
5225 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5226 mono_mb_emit_ldloc (mb, p_var);
5227 mono_mb_emit_ldloc (mb, size_var);
5228 mono_mb_emit_byte (mb, CEE_CONV_I);
5229 mono_mb_emit_byte (mb, CEE_ADD);
5230 mono_mb_emit_stloc (mb, new_next_var);
5232 /* tlab_next = new_next */
5233 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5234 mono_mb_emit_ldloc (mb, new_next_var);
5235 mono_mb_emit_byte (mb, CEE_STIND_I);
5237 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5238 mono_mb_emit_ldloc (mb, new_next_var);
5239 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5240 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5241 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5242 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5246 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5247 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5249 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5250 mono_mb_emit_ldarg (mb, 0);
5251 mono_mb_emit_ldloc (mb, size_var);
5252 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5253 mono_mb_emit_byte (mb, CEE_RET);
5256 mono_mb_patch_short_branch (mb, slowpath_branch);
5258 /* FIXME: Memory barrier */
5261 mono_mb_emit_ldloc (mb, p_var);
5262 mono_mb_emit_ldarg (mb, 0);
5263 mono_mb_emit_byte (mb, CEE_STIND_I);
5266 mono_mb_emit_ldloc (mb, p_var);
5267 mono_mb_emit_byte (mb, CEE_RET);
5269 res = mono_mb_create_method (mb, csig, 8);
5271 mono_method_get_header (res)->init_locals = FALSE;
5275 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5278 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5279 * The signature of the called method is:
5280 * object allocate (MonoVTable *vtable)
5283 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5285 int tlab_next_offset = -1;
5286 int tlab_temp_end_offset = -1;
5287 MonoClass *klass = vtable->klass;
5288 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5289 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5291 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5293 if (klass->instance_size > tlab_size)
5295 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5299 if (klass->byval_arg.type == MONO_TYPE_STRING)
5301 if (collect_before_allocs)
5304 return mono_gc_get_managed_allocator_by_type (0);
5308 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5314 mono_gc_get_managed_allocator_by_type (int atype)
5318 mono_loader_lock ();
5319 res = alloc_method_cache [atype];
5321 res = alloc_method_cache [atype] = create_allocator (atype);
5322 mono_loader_unlock ();
5327 mono_gc_get_managed_allocator_types (void)
5332 #endif /* HAVE_SGEN_GC */