2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (first vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "metadata/method-builder.h"
148 #include "metadata/profiler-private.h"
149 #include "utils/mono-mmap.h"
151 #ifdef HAVE_VALGRIND_MEMCHECK_H
152 #include <valgrind/memcheck.h>
155 #define OPDEF(a,b,c,d,e,f,g,h,i,j) \
159 #include "mono/cil/opcode.def"
166 * ######################################################################
167 * ######## Types and constants used by the GC.
168 * ######################################################################
170 #if SIZEOF_VOID_P == 4
171 typedef guint32 mword;
173 typedef guint64 mword;
176 static int gc_initialized = 0;
177 static int gc_debug_level = 0;
178 static FILE* gc_debug_file;
179 /* If set, do a minor collection before every allocation */
180 static gboolean collect_before_allocs = FALSE;
181 /* If set, do a heap consistency check before each minor collection */
182 static gboolean consistency_check_at_minor_collection = FALSE;
185 mono_gc_flush_info (void)
187 fflush (gc_debug_file);
190 #define MAX_DEBUG_LEVEL 9
191 #define DEBUG(level,a) do {if (G_UNLIKELY ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level)) a;} while (0)
193 #define TV_DECLARE(name) struct timeval name
194 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
195 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
197 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
207 /* each request from the OS ends up in a GCMemSection */
208 typedef struct _GCMemSection GCMemSection;
209 struct _GCMemSection {
213 /* pointer where more data could be allocated if it fits */
217 * scan starts is an array of pointers to objects equally spaced in the allocation area
218 * They let use quickly find pinned objects from pinning pointers.
221 /* in major collections indexes in the pin_queue for objects that pin this section */
224 unsigned short num_scan_start;
228 /* large object space struct: 64+ KB */
229 /* we could make this limit much smaller to avoid memcpy copy
230 * and potentially have more room in the GC descriptor: need to measure
231 * This also means that such small OS objects will need to be
232 * allocated in a different way (using pinned chunks).
233 * We may want to put large but smaller than 64k objects in the fixed space
234 * when we move the object from one generation to another (to limit the
235 * pig in the snake effect).
236 * Note: it may be worth to have an optimized copy function, since we can
237 * assume that objects are aligned and have a multiple of 8 size.
238 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
239 * true if MONO_ZERO_LEN_ARRAY is nonzero.
241 typedef struct _LOSObject LOSObject;
244 mword size; /* this is the object size */
245 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
248 char data [MONO_ZERO_LEN_ARRAY];
251 /* Pinned objects are allocated in the LOS space if bigger than half a page
252 * or from freelists otherwise. We assume that pinned objects are relatively few
253 * and they have a slow dying speed (like interned strings, thread objects).
254 * As such they will be collected only at major collections.
255 * free lists are not global: when we need memory we allocate a PinnedChunk.
256 * Each pinned chunk is made of several pages, the first of wich is used
257 * internally for bookeeping (here think of a page as 4KB). The bookeeping
258 * includes the freelists vectors and info about the object size of each page
259 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
260 * a size is assigned to it, the page is divided in the proper chunks and each
261 * chunk is added to the freelist. To not waste space, the remaining space in the
262 * first page is used as objects of size 16 or 32 (need to measure which are more
264 * We use this same structure to allocate memory used internally by the GC, so
265 * we never use malloc/free if we need to alloc during collection: the world is stopped
266 * and malloc/free will deadlock.
267 * When we want to iterate over pinned objects, we just scan a page at a time
268 * linearly according to the size of objects in the page: the next pointer used to link
269 * the items in the freelist uses the same word as the vtable. Since we keep freelists
270 * for each pinned chunk, if the word points outside the pinned chunk it means
272 * We could avoid this expensive scanning in creative ways. We could have a policy
273 * of putting in the pinned space only objects we know about that have no struct fields
274 * with references and we can easily use a even expensive write barrier for them,
275 * since pointer writes on such objects should be rare.
276 * The best compromise is to just alloc interned strings and System.MonoType in them.
277 * It would be nice to allocate MonoThread in it, too: must check that we properly
278 * use write barriers so we don't have to do any expensive scanning of the whole pinned
279 * chunk list during minor collections. We can avoid it now because we alloc in it only
280 * reference-free objects.
282 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
283 #define MAX_FREELIST_SIZE 2048
284 #define PINNED_PAGE_SIZE (4096)
285 #define PINNED_CHUNK_MIN_SIZE (4096*8)
286 typedef struct _PinnedChunk PinnedChunk;
287 struct _PinnedChunk {
290 int *page_sizes; /* a 0 means the page is still unused */
293 void *data [1]; /* page sizes and free lists are stored here */
296 /* The method used to clear the nursery */
297 /* Clearing at nursery collections is the safest, but has bad interactions with caches.
298 * Clearing at TLAB creation is much faster, but more complex and it might expose hard
303 CLEAR_AT_TLAB_CREATION
304 } NurseryClearPolicy;
306 static NurseryClearPolicy nursery_clear_policy = CLEAR_AT_TLAB_CREATION;
309 * The young generation is divided into fragments. This is because
310 * we can hand one fragments to a thread for lock-less fast alloc and
311 * because the young generation ends up fragmented anyway by pinned objects.
312 * Once a collection is done, a list of fragments is created. When doing
313 * thread local alloc we use smallish nurseries so we allow new threads to
314 * allocate memory from gen0 without triggering a collection. Threads that
315 * are found to allocate lots of memory are given bigger fragments. This
316 * should make the finalizer thread use little nursery memory after a while.
317 * We should start assigning threads very small fragments: if there are many
318 * threads the nursery will be full of reserved space that the threads may not
319 * use at all, slowing down allocation speed.
320 * Thread local allocation is done from areas of memory Hotspot calls Thread Local
321 * Allocation Buffers (TLABs).
323 typedef struct _Fragment Fragment;
327 char *fragment_start;
328 char *fragment_limit; /* the current soft limit for allocation */
332 /* the runtime can register areas of memory as roots: we keep two lists of roots,
333 * a pinned root set for conservatively scanned roots and a normal one for
334 * precisely scanned roots (currently implemented as a single list).
336 typedef struct _RootRecord RootRecord;
344 /* for use with write barriers */
345 typedef struct _RememberedSet RememberedSet;
346 struct _RememberedSet {
350 mword data [MONO_ZERO_LEN_ARRAY];
353 /* we have 4 possible values in the low 2 bits */
355 REMSET_LOCATION, /* just a pointer to the exact location */
356 REMSET_RANGE, /* range of pointer fields */
357 REMSET_OBJECT, /* mark all the object for scanning */
358 REMSET_VTYPE, /* a valuetype described by a gc descriptor */
359 REMSET_TYPE_MASK = 0x3
362 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
363 static pthread_key_t remembered_set_key;
364 static RememberedSet *global_remset;
365 static int store_to_global_remset = 0;
367 /* FIXME: later choose a size that takes into account the RememberedSet struct
368 * and doesn't waste any alloc paddin space.
370 #define DEFAULT_REMSET_SIZE 1024
371 static RememberedSet* alloc_remset (int size, gpointer id);
373 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
374 * no cast from a pointer to an integer
381 /* these bits are set in the object vtable: we could merge them since an object can be
382 * either pinned or forwarded but not both.
383 * We store them in the vtable slot because the bits are used in the sync block for
384 * other purposes: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
385 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
386 * would be an invalid combination for the monitor and hash code).
387 * The values are already shifted.
388 * The forwarding address is stored in the sync block.
390 #define FORWARDED_BIT 1
392 #define VTABLE_BITS_MASK 0x3
394 /* returns NULL if not forwarded, or the forwarded address */
395 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
396 /* set the forwarded address fw_addr for object obj */
397 #define forward_object(obj,fw_addr) do { \
398 ((mword*)(obj))[0] |= FORWARDED_BIT; \
399 ((mword*)(obj))[1] = (mword)(fw_addr); \
402 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
403 #define pin_object(obj) do { \
404 ((mword*)(obj))[0] |= PINNED_BIT; \
406 #define unpin_object(obj) do { \
407 ((mword*)(obj))[0] &= ~PINNED_BIT; \
412 * Since we set bits in the vtable, use the macro to load it from the pointer to
413 * an object that is potentially pinned.
415 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
418 safe_name (void* obj)
420 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
421 return vt->klass->name;
425 safe_object_get_size (MonoObject* o)
427 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
428 if (klass == mono_defaults.string_class) {
429 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
430 } else if (klass->rank) {
431 MonoArray *array = (MonoArray*)o;
432 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
436 size += sizeof (MonoArrayBounds) * klass->rank;
440 /* from a created object: the class must be inited already */
441 return klass->instance_size;
445 static inline gboolean
446 is_maybe_half_constructed (MonoObject *o)
450 klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
451 if ((klass == mono_defaults.string_class && mono_string_length ((MonoString*)o) == 0) ||
452 (klass->rank && mono_array_length ((MonoArray*)o) == 0))
459 * ######################################################################
460 * ######## Global data.
461 * ######################################################################
463 static LOCK_DECLARE (gc_mutex);
464 static int gc_disabled = 0;
465 static int num_minor_gcs = 0;
466 static int num_major_gcs = 0;
468 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
469 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
470 #define DEFAULT_NURSERY_SIZE (1024*512*2)
471 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
472 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
473 /* to quickly find the head of an object pinned by a conservative address
474 * we keep track of the objects allocated for each SCAN_START_SIZE memory
475 * chunk in the nursery or other memory sections. Larger values have less
476 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
478 #define SCAN_START_SIZE (4096*2)
479 /* the minimum size of a fragment that we consider useful for allocation */
480 #define FRAGMENT_MIN_SIZE (512)
481 /* This is a fixed value used for pinned chunks, not the system pagesize */
482 #define FREELIST_PAGESIZE 4096
484 static mword pagesize = 4096;
485 static mword nursery_size = DEFAULT_NURSERY_SIZE;
486 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
487 static mword max_section_size = DEFAULT_MAX_SECTION;
488 static int section_size_used = 0;
489 static int degraded_mode = 0;
491 static LOSObject *los_object_list = NULL;
492 static mword los_memory_usage = 0;
493 static mword los_num_objects = 0;
494 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
495 static mword total_alloc = 0;
496 /* use this to tune when to do a major/minor collection */
497 static mword memory_pressure = 0;
499 static GCMemSection *section_list = NULL;
500 static GCMemSection *nursery_section = NULL;
501 static mword lowest_heap_address = ~(mword)0;
502 static mword highest_heap_address = 0;
504 typedef struct _FinalizeEntry FinalizeEntry;
505 struct _FinalizeEntry {
508 void *data; /* can be a disappearing link or the data for the finalizer */
509 /* Note we could use just one pointer if we don't support multiple callbacks
510 * for finalizers and per-finalizer data and if we store the obj pointers
511 * in the link like libgc does
516 * The finalizable hash has the object as the key, the
517 * disappearing_link hash, has the link address as key.
519 static FinalizeEntry **finalizable_hash = NULL;
520 /* objects that are ready to be finalized */
521 static FinalizeEntry *fin_ready_list = NULL;
522 /* disappearing links use the same structure but a different list */
523 static FinalizeEntry **disappearing_link_hash = NULL;
524 static mword disappearing_link_hash_size = 0;
525 static mword finalizable_hash_size = 0;
527 static int num_registered_finalizers = 0;
528 static int num_ready_finalizers = 0;
529 static int num_disappearing_links = 0;
530 static int no_finalize = 0;
532 /* keep each size a multiple of ALLOC_ALIGN */
533 /* on 64 bit systems 8 is likely completely unused. */
534 static const int freelist_sizes [] = {
535 8, 16, 24, 32, 40, 48, 64, 80,
536 96, 128, 160, 192, 224, 256, 320, 384,
537 448, 512, 584, 680, 816, 1024, 1360, 2048};
538 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
540 static char* max_pinned_chunk_addr = NULL;
541 static char* min_pinned_chunk_addr = (char*)-1;
542 /* pinned_chunk_list is used for allocations of objects that are never moved */
543 static PinnedChunk *pinned_chunk_list = NULL;
544 /* internal_chunk_list is used for allocating structures needed by the GC */
545 static PinnedChunk *internal_chunk_list = NULL;
548 obj_is_from_pinned_alloc (char *p)
550 PinnedChunk *chunk = pinned_chunk_list;
551 for (; chunk; chunk = chunk->next) {
552 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
558 /* registered roots: the key to the hash is the root start address */
559 static RootRecord **roots_hash = NULL;
560 static int roots_hash_size = 0;
561 static mword roots_size = 0; /* amount of memory in the root set */
562 static int num_roots_entries = 0;
565 * The current allocation cursors
566 * We allocate objects in the nursery.
567 * The nursery is the area between nursery_start and nursery_real_end.
568 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
569 * from nursery fragments.
570 * tlab_next is the pointer to the space inside the TLAB where the next object will
572 * tlab_temp_end is the pointer to the end of the temporary space reserved for
573 * the allocation: it allows us to set the scan starts at reasonable intervals.
574 * tlab_real_end points to the end of the TLAB.
575 * nursery_frag_real_end points to the end of the currently used nursery fragment.
576 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
577 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
578 * At the next allocation, the area of the nursery where objects can be present is
579 * between MIN(nursery_first_pinned_start, first_fragment_start) and
580 * MAX(nursery_last_pinned_end, nursery_frag_real_end)
582 static char *nursery_start = NULL;
585 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
586 * variables for next+temp_end ?
588 static __thread char *tlab_start;
589 static __thread char *tlab_next;
590 static __thread char *tlab_temp_end;
591 static __thread char *tlab_real_end;
592 /* Used by the managed allocator */
593 static __thread char **tlab_next_addr;
594 static char *nursery_next = NULL;
595 static char *nursery_frag_real_end = NULL;
596 static char *nursery_real_end = NULL;
597 static char *nursery_first_pinned_start = NULL;
598 static char *nursery_last_pinned_end = NULL;
600 /* The size of a TLAB */
601 /* The bigger the value, the less often we have to go to the slow path to allocate a new
602 * one, but the more space is wasted by threads not allocating much memory.
604 * FIXME: Make this self-tuning for each thread.
606 static guint32 tlab_size = (1024 * 4);
608 /* fragments that are free and ready to be used for allocation */
609 static Fragment *nursery_fragments = NULL;
610 /* freeelist of fragment structures */
611 static Fragment *fragment_freelist = NULL;
614 * used when moving the objects
615 * When the nursery is collected, objects are copied to to_space.
616 * The area between to_space and gray_objects is used as a stack
617 * of objects that need their fields checked for more references
619 * We should optimize somehow this mechanism to avoid rescanning
620 * ptr-free objects. The order is also probably not optimal: need to
621 * test cache misses and other graph traversal orders.
623 static char *to_space = NULL;
624 static char *gray_objects = NULL;
625 static char *to_space_end = NULL;
626 static GCMemSection *to_space_section = NULL;
628 /* objects bigger then this go into the large object space */
629 #define MAX_SMALL_OBJ_SIZE 0xffff
632 * ######################################################################
633 * ######## Macros and function declarations.
634 * ######################################################################
637 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
638 if ((mword)(low) < lowest_heap_address) \
639 lowest_heap_address = (mword)(low); \
640 if ((mword)(high) > highest_heap_address) \
641 highest_heap_address = (mword)(high); \
645 align_pointer (void *ptr)
647 mword p = (mword)ptr;
648 p += sizeof (gpointer) - 1;
649 p &= ~ (sizeof (gpointer) - 1);
653 /* forward declarations */
654 static void* get_internal_mem (size_t size);
655 static void free_internal_mem (void *addr);
656 static void* get_os_memory (size_t size, int activate);
657 static void free_os_memory (void *addr, size_t size);
658 static void report_internal_mem_usage (void);
660 static int stop_world (void);
661 static int restart_world (void);
662 static void pin_thread_data (void *start_nursery, void *end_nursery);
663 static void scan_from_remsets (void *start_nursery, void *end_nursery);
664 static void find_pinning_ref_from_thread (char *obj, size_t size);
665 static void update_current_thread_stack (void *start);
666 static GCMemSection* alloc_section (size_t size);
667 static void finalize_in_range (char *start, char *end);
668 static void null_link_in_range (char *start, char *end);
669 static gboolean search_fragment_for_size (size_t size);
670 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
671 static void clear_remsets (void);
672 static void clear_tlabs (void);
673 static char *find_tlab_next_from_address (char *addr);
674 static void sweep_pinned_objects (void);
675 static void free_large_object (LOSObject *obj);
676 static void free_mem_section (GCMemSection *section);
678 void check_consistency (void);
681 * ######################################################################
682 * ######## GC descriptors
683 * ######################################################################
684 * Used to quickly get the info the GC needs about an object: size and
685 * where the references are held.
687 /* objects are aligned to 8 bytes boundaries
688 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
689 * The low 3 bits define the type of the descriptor. The other bits
690 * depend on the type.
691 * As a general rule the 13 remaining low bits define the size, either
692 * of the whole object or of the elements in the arrays. While for objects
693 * the size is already in bytes, for arrays we need to shift, because
694 * array elements might be smaller than 8 bytes. In case of arrays, we
695 * use two bits to describe what the additional high bits represents,
696 * so the default behaviour can handle element sizes less than 2048 bytes.
697 * The high 16 bits, if 0 it means the object is pointer-free.
698 * This design should make it easy and fast to skip over ptr-free data.
699 * The first 4 types should cover >95% of the objects.
700 * Note that since the size of objects is limited to 64K, larger objects
701 * will be allocated in the large object heap.
702 * If we want 4-bytes alignment, we need to put vector and small bitmap
706 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
707 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
708 DESC_TYPE_STRING, /* nothing */
709 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
710 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
711 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
712 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
713 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
714 /* subtypes for arrays and vectors */
715 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
716 DESC_TYPE_V_REFS, /* all the array elements are refs */
717 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
718 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
721 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
722 #define LOW_TYPE_BITS 3
723 #define SMALL_BITMAP_SHIFT 16
724 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
725 #define VECTOR_INFO_SHIFT 14
726 #define VECTOR_ELSIZE_SHIFT 3
727 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
728 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
729 #define SMALL_SIZE_MASK 0xfff8
730 #define MAX_ELEMENT_SIZE 0x3ff
731 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
732 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
733 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
734 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
735 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
737 #define ALLOC_ALIGN 8
740 /* Root bitmap descriptors are simpler: the lower two bits describe the type
741 * and we either have 30/62 bitmap bits or nibble-based run-length,
742 * or a complex descriptor
745 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
748 ROOT_DESC_LARGE_BITMAP,
749 ROOT_DESC_TYPE_MASK = 0x3,
750 ROOT_DESC_TYPE_SHIFT = 2,
753 static gsize* complex_descriptors = NULL;
754 static int complex_descriptors_size = 0;
755 static int complex_descriptors_next = 0;
758 alloc_complex_descriptor (gsize *bitmap, int numbits)
760 int nwords = numbits/GC_BITS_PER_WORD + 2;
765 res = complex_descriptors_next;
766 /* linear search, so we don't have duplicates with domain load/unload
767 * this should not be performance critical or we'd have bigger issues
768 * (the number and size of complex descriptors should be small).
770 for (i = 0; i < complex_descriptors_next; ) {
771 if (complex_descriptors [i] == nwords) {
773 for (j = 0; j < nwords - 1; ++j) {
774 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
784 i += complex_descriptors [i];
786 if (complex_descriptors_next + nwords > complex_descriptors_size) {
787 int new_size = complex_descriptors_size * 2 + nwords;
788 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
789 complex_descriptors_size = new_size;
791 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
792 complex_descriptors_next += nwords;
793 complex_descriptors [res] = nwords;
794 for (i = 0; i < nwords - 1; ++i) {
795 complex_descriptors [res + 1 + i] = bitmap [i];
796 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
803 * Descriptor builders.
806 mono_gc_make_descr_for_string (gsize *bitmap, int numbits)
808 return (void*) DESC_TYPE_STRING;
812 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
814 int first_set = -1, num_set = 0, last_set = -1, i;
816 size_t stored_size = obj_size;
817 stored_size += ALLOC_ALIGN - 1;
818 stored_size &= ~(ALLOC_ALIGN - 1);
819 for (i = 0; i < numbits; ++i) {
820 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
827 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
828 /* check run-length encoding first: one byte offset, one byte number of pointers
829 * on 64 bit archs, we can have 3 runs, just one on 32.
830 * It may be better to use nibbles.
833 desc = DESC_TYPE_RUN_LENGTH | stored_size;
834 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %zd\n", (void*)desc, stored_size));
836 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
837 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
838 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %zd, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
841 /* we know the 2-word header is ptr-free */
842 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
843 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
844 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
848 /* we know the 2-word header is ptr-free */
849 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
850 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
851 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %zd, last set: %d\n", (void*)desc, stored_size, last_set));
854 /* it's a complex object ... */
855 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
859 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
861 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
863 int first_set = -1, num_set = 0, last_set = -1, i;
864 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
865 for (i = 0; i < numbits; ++i) {
866 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
873 if (elem_size <= MAX_ELEMENT_SIZE) {
874 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
876 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
878 /* Note: we also handle structs with just ref fields */
879 if (num_set * sizeof (gpointer) == elem_size) {
880 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((gssize)(-1) << 16));
882 /* FIXME: try run-len first */
883 /* Note: we can't skip the object header here, because it's not present */
884 if (last_set <= SMALL_BITMAP_SIZE) {
885 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
888 /* it's am array of complex structs ... */
889 desc = DESC_TYPE_COMPLEX_ARR;
890 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
894 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
895 #define STRING_SIZE(size,str) do { \
896 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
897 (size) += (ALLOC_ALIGN - 1); \
898 (size) &= ~(ALLOC_ALIGN - 1); \
901 #define OBJ_RUN_LEN_SIZE(size,desc,obj) do { \
902 (size) = (desc) & 0xfff8; \
905 #define OBJ_BITMAP_SIZE(size,desc,obj) do { \
906 (size) = (desc) & 0xfff8; \
909 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
910 #define PREFETCH(addr)
912 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
913 #define OBJ_RUN_LEN_FOREACH_PTR(desc,obj) do { \
914 if ((desc) & 0xffff0000) { \
915 /* there are pointers */ \
916 void **_objptr_end; \
917 void **_objptr = (void**)(obj); \
918 _objptr += ((desc) >> 16) & 0xff; \
919 _objptr_end = _objptr + (((desc) >> 24) & 0xff); \
920 while (_objptr < _objptr_end) { \
921 HANDLE_PTR (_objptr, (obj)); \
927 /* a bitmap desc means that there are pointer references or we'd have
928 * choosen run-length, instead: add an assert to check.
930 #define OBJ_BITMAP_FOREACH_PTR(desc,obj) do { \
931 /* there are pointers */ \
932 void **_objptr = (void**)(obj); \
933 gsize _bmap = (desc) >> 16; \
934 _objptr += OBJECT_HEADER_WORDS; \
937 HANDLE_PTR (_objptr, (obj)); \
944 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
945 /* there are pointers */ \
946 void **_objptr = (void**)(obj); \
947 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
948 _objptr += OBJECT_HEADER_WORDS; \
951 HANDLE_PTR (_objptr, (obj)); \
958 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
959 /* there are pointers */ \
960 void **_objptr = (void**)(obj); \
961 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
962 int bwords = (*bitmap_data) - 1; \
963 void **start_run = _objptr; \
966 MonoObject *myobj = (MonoObject*)obj; \
967 g_print ("found %d at %p (0x%zx): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
969 while (bwords-- > 0) { \
970 gsize _bmap = *bitmap_data++; \
971 _objptr = start_run; \
972 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
975 HANDLE_PTR (_objptr, (obj)); \
980 start_run += GC_BITS_PER_WORD; \
984 /* this one is untested */
985 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
986 /* there are pointers */ \
987 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
988 int mbwords = (*mbitmap_data++) - 1; \
989 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
990 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
991 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
993 MonoObject *myobj = (MonoObject*)start; \
994 g_print ("found %d at %p (0x%zx): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
996 while (e_start < e_end) { \
997 void **_objptr = (void**)e_start; \
998 gsize *bitmap_data = mbitmap_data; \
999 unsigned int bwords = mbwords; \
1000 while (bwords-- > 0) { \
1001 gsize _bmap = *bitmap_data++; \
1002 void **start_run = _objptr; \
1003 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
1005 if ((_bmap & 1)) { \
1006 HANDLE_PTR (_objptr, (obj)); \
1011 _objptr = start_run + GC_BITS_PER_WORD; \
1013 e_start += el_size; \
1017 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
1018 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
1019 if ((vt)->desc & 0xffffc000) { \
1020 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
1021 /* there are pointers */ \
1022 int etype = (vt)->desc & 0xc000; \
1023 if (etype == (DESC_TYPE_V_REFS << 14)) { \
1024 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
1025 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
1026 /* Note: this code can handle also arrays of struct with only references in them */ \
1027 while (p < end_refs) { \
1028 HANDLE_PTR (p, (obj)); \
1031 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
1032 int offset = ((vt)->desc >> 16) & 0xff; \
1033 int num_refs = ((vt)->desc >> 24) & 0xff; \
1034 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1035 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1036 while (e_start < e_end) { \
1037 void **p = (void**)e_start; \
1040 for (i = 0; i < num_refs; ++i) { \
1041 HANDLE_PTR (p + i, (obj)); \
1043 e_start += el_size; \
1045 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
1046 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
1047 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
1048 while (e_start < e_end) { \
1049 void **p = (void**)e_start; \
1050 gsize _bmap = (vt)->desc >> 16; \
1051 /* Note: there is no object header here to skip */ \
1053 if ((_bmap & 1)) { \
1054 HANDLE_PTR (p, (obj)); \
1059 e_start += el_size; \
1065 static mword new_obj_references = 0;
1066 static mword obj_references_checked = 0;
1069 #define HANDLE_PTR(ptr,obj) do { \
1070 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1071 new_obj_references++; \
1072 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1074 obj_references_checked++; \
1079 * ######################################################################
1080 * ######## Detecting and removing garbage.
1081 * ######################################################################
1082 * This section of code deals with detecting the objects no longer in use
1083 * and reclaiming the memory.
1085 static void __attribute__((noinline))
1086 scan_area (char *start, char *end)
1091 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1093 new_obj_references = 0;
1094 obj_references_checked = 0;
1095 while (start < end) {
1096 if (!*(void**)start) {
1097 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1100 vt = (GCVTable*)LOAD_VTABLE (start);
1101 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1103 MonoObject *obj = (MonoObject*)start;
1104 g_print ("found at %p (0x%zx): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1108 if (type == DESC_TYPE_STRING) {
1109 STRING_SIZE (skip_size, start);
1113 } else if (type == DESC_TYPE_RUN_LENGTH) {
1114 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1115 g_assert (skip_size);
1116 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1120 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1121 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1122 skip_size *= mono_array_length ((MonoArray*)start);
1123 skip_size += sizeof (MonoArray);
1124 skip_size += (ALLOC_ALIGN - 1);
1125 skip_size &= ~(ALLOC_ALIGN - 1);
1126 OBJ_VECTOR_FOREACH_PTR (vt, start);
1127 if (type == DESC_TYPE_ARRAY) {
1128 /* account for the bounds */
1133 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1134 OBJ_BITMAP_SIZE (skip_size, desc, start);
1135 g_assert (skip_size);
1136 OBJ_BITMAP_FOREACH_PTR (desc,start);
1140 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1141 skip_size = safe_object_get_size ((MonoObject*)start);
1142 skip_size += (ALLOC_ALIGN - 1);
1143 skip_size &= ~(ALLOC_ALIGN - 1);
1144 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1148 } else if (type == DESC_TYPE_COMPLEX) {
1149 /* this is a complex object */
1150 skip_size = safe_object_get_size ((MonoObject*)start);
1151 skip_size += (ALLOC_ALIGN - 1);
1152 skip_size &= ~(ALLOC_ALIGN - 1);
1153 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1157 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1158 /* this is an array of complex structs */
1159 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1160 skip_size *= mono_array_length ((MonoArray*)start);
1161 skip_size += sizeof (MonoArray);
1162 skip_size += (ALLOC_ALIGN - 1);
1163 skip_size &= ~(ALLOC_ALIGN - 1);
1164 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1165 if (type == DESC_TYPE_ARRAY) {
1166 /* account for the bounds */
1175 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1176 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1177 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1180 static void __attribute__((noinline))
1181 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1188 while (start < end) {
1189 if (!*(void**)start) {
1190 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1193 vt = (GCVTable*)LOAD_VTABLE (start);
1194 /* handle threads someway (maybe insert the root domain vtable?) */
1195 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1196 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1203 if (type == DESC_TYPE_STRING) {
1204 STRING_SIZE (skip_size, start);
1205 if (remove) memset (start, 0, skip_size);
1208 } else if (type == DESC_TYPE_RUN_LENGTH) {
1209 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1210 g_assert (skip_size);
1211 if (remove) memset (start, 0, skip_size);
1214 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1215 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1216 skip_size *= mono_array_length ((MonoArray*)start);
1217 skip_size += sizeof (MonoArray);
1218 skip_size += (ALLOC_ALIGN - 1);
1219 skip_size &= ~(ALLOC_ALIGN - 1);
1220 if (type == DESC_TYPE_ARRAY) {
1221 /* account for the bounds */
1223 if (remove) memset (start, 0, skip_size);
1226 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1227 OBJ_BITMAP_SIZE (skip_size, desc, start);
1228 g_assert (skip_size);
1229 if (remove) memset (start, 0, skip_size);
1232 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1233 skip_size = safe_object_get_size ((MonoObject*)start);
1234 skip_size += (ALLOC_ALIGN - 1);
1235 skip_size &= ~(ALLOC_ALIGN - 1);
1236 if (remove) memset (start, 0, skip_size);
1239 } else if (type == DESC_TYPE_COMPLEX) {
1240 /* this is a complex object */
1241 skip_size = safe_object_get_size ((MonoObject*)start);
1242 skip_size += (ALLOC_ALIGN - 1);
1243 skip_size &= ~(ALLOC_ALIGN - 1);
1244 if (remove) memset (start, 0, skip_size);
1247 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1248 /* this is an array of complex structs */
1249 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1250 skip_size *= mono_array_length ((MonoArray*)start);
1251 skip_size += sizeof (MonoArray);
1252 skip_size += (ALLOC_ALIGN - 1);
1253 skip_size &= ~(ALLOC_ALIGN - 1);
1254 if (type == DESC_TYPE_ARRAY) {
1255 /* account for the bounds */
1257 if (remove) memset (start, 0, skip_size);
1267 * When appdomains are unloaded we can easily remove objects that have finalizers,
1268 * but all the others could still be present in random places on the heap.
1269 * We need a sweep to get rid of them even though it's going to be costly
1271 * The reason we need to remove them is because we access the vtable and class
1272 * structures to know the object size and the reference bitmap: once the domain is
1273 * unloaded the point to random memory.
1276 mono_gc_clear_domain (MonoDomain * domain)
1278 GCMemSection *section;
1280 for (section = section_list; section; section = section->next) {
1281 scan_area_for_domain (domain, section->data, section->end_data);
1283 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1288 add_to_global_remset (gpointer ptr)
1291 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1292 if (global_remset->store_next < global_remset->end_set) {
1293 *(global_remset->store_next++) = (mword)ptr;
1296 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1297 rs->next = global_remset;
1299 *(global_remset->store_next++) = (mword)ptr;
1303 * This is how the copying happens from the nursery to the old generation.
1304 * We assume that at this time all the pinned objects have been identified and
1306 * We run scan_object() for each pinned object so that each referenced
1307 * objects if possible are copied. The new gray objects created can have
1308 * scan_object() run on them right away, too.
1309 * Then we run copy_object() for the precisely tracked roots. At this point
1310 * all the roots are either gray or black. We run scan_object() on the gray
1311 * objects until no more gray objects are created.
1312 * At the end of the process we walk again the pinned list and we unmark
1313 * the pinned flag. As we go we also create the list of free space for use
1314 * in the next allocation runs.
1316 * We need to remember objects from the old generation that point to the new one
1317 * (or just addresses?).
1319 * copy_object could be made into a macro once debugged (use inline for now).
1322 static char* __attribute__((noinline))
1323 copy_object (char *obj, char *from_space_start, char *from_space_end)
1325 if (obj >= from_space_start && obj < from_space_end && (obj < to_space || obj >= to_space_end)) {
1329 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1330 if ((forwarded = object_is_forwarded (obj))) {
1331 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1332 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1335 if (object_is_pinned (obj)) {
1336 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1337 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1340 objsize = safe_object_get_size ((MonoObject*)obj);
1341 objsize += ALLOC_ALIGN - 1;
1342 objsize &= ~(ALLOC_ALIGN - 1);
1343 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %zd)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1344 /* FIXME: handle pinned allocs:
1345 * Large objects are simple, at least until we always follow the rule:
1346 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1347 * At the end of major collections, we walk the los list and if
1348 * the object is pinned, it is marked, otherwise it can be freed.
1350 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1351 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %zd\n", obj, safe_name (obj), objsize));
1355 /* ok, the object is not pinned, we can move it */
1356 /* use a optimized memcpy here */
1361 char* edi = gray_objects;
1362 __asm__ __volatile__(
1364 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1365 : "0" (objsize/4), "1" (edi),"2" (esi)
1370 memcpy (gray_objects, obj, objsize);
1372 /* adjust array->bounds */
1373 vt = ((MonoObject*)obj)->vtable;
1374 g_assert (vt->gc_descr);
1375 if (vt->rank && ((MonoArray*)obj)->bounds) {
1376 MonoArray *array = (MonoArray*)gray_objects;
1377 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1378 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %zd, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1380 /* set the forwarding pointer */
1381 forward_object (obj, gray_objects);
1383 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1384 gray_objects += objsize;
1385 DEBUG (8, g_assert (gray_objects <= to_space_end));
1392 #define HANDLE_PTR(ptr,obj) do { \
1394 void *__old = *(ptr); \
1395 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1396 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1397 if (G_UNLIKELY (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end)) \
1398 add_to_global_remset ((ptr)); \
1403 * Scan the object pointed to by @start for references to
1404 * other objects between @from_start and @from_end and copy
1405 * them to the gray_objects area.
1406 * Returns a pointer to the end of the object.
1409 scan_object (char *start, char* from_start, char* from_end)
1415 vt = (GCVTable*)LOAD_VTABLE (start);
1416 //type = vt->desc & 0x7;
1418 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1420 switch (desc & 0x7) {
1421 //if (type == DESC_TYPE_STRING) {
1422 case DESC_TYPE_STRING:
1423 STRING_SIZE (skip_size, start);
1424 return start + skip_size;
1425 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1426 case DESC_TYPE_RUN_LENGTH:
1427 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1428 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1429 g_assert (skip_size);
1430 return start + skip_size;
1431 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1432 case DESC_TYPE_ARRAY:
1433 case DESC_TYPE_VECTOR:
1434 OBJ_VECTOR_FOREACH_PTR (vt, start);
1435 skip_size = safe_object_get_size ((MonoObject*)start);
1437 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1438 skip_size *= mono_array_length ((MonoArray*)start);
1439 skip_size += sizeof (MonoArray);
1441 skip_size += (ALLOC_ALIGN - 1);
1442 skip_size &= ~(ALLOC_ALIGN - 1);
1443 return start + skip_size;
1444 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1445 case DESC_TYPE_SMALL_BITMAP:
1446 OBJ_BITMAP_FOREACH_PTR (desc,start);
1447 OBJ_BITMAP_SIZE (skip_size, desc, start);
1448 return start + skip_size;
1449 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1450 case DESC_TYPE_LARGE_BITMAP:
1451 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1452 skip_size = safe_object_get_size ((MonoObject*)start);
1453 skip_size += (ALLOC_ALIGN - 1);
1454 skip_size &= ~(ALLOC_ALIGN - 1);
1455 return start + skip_size;
1456 //} else if (type == DESC_TYPE_COMPLEX) {
1457 case DESC_TYPE_COMPLEX:
1458 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1459 /* this is a complex object */
1460 skip_size = safe_object_get_size ((MonoObject*)start);
1461 skip_size += (ALLOC_ALIGN - 1);
1462 skip_size &= ~(ALLOC_ALIGN - 1);
1463 return start + skip_size;
1464 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1465 case DESC_TYPE_COMPLEX_ARR:
1466 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1467 /* this is an array of complex structs */
1468 skip_size = safe_object_get_size ((MonoObject*)start);
1470 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1471 skip_size *= mono_array_length ((MonoArray*)start);
1472 skip_size += sizeof (MonoArray);
1474 skip_size += (ALLOC_ALIGN - 1);
1475 skip_size &= ~(ALLOC_ALIGN - 1);
1476 return start + skip_size;
1478 g_assert_not_reached ();
1485 * Scan the valuetype pointed to by START, described by DESC for references to
1486 * other objects between @from_start and @from_end and copy them to the gray_objects area.
1487 * Returns a pointer to the end of the object.
1490 scan_vtype (char *start, mword desc, char* from_start, char* from_end)
1494 /* The descriptors include info about the MonoObject header as well */
1495 start -= sizeof (MonoObject);
1497 switch (desc & 0x7) {
1498 case DESC_TYPE_RUN_LENGTH:
1499 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
1500 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
1501 g_assert (skip_size);
1502 return start + skip_size;
1503 case DESC_TYPE_SMALL_BITMAP:
1504 OBJ_BITMAP_FOREACH_PTR (desc,start);
1505 OBJ_BITMAP_SIZE (skip_size, desc, start);
1506 return start + skip_size;
1507 case DESC_TYPE_LARGE_BITMAP:
1508 case DESC_TYPE_COMPLEX:
1510 g_assert_not_reached ();
1513 // The other descriptors can't happen with vtypes
1514 g_assert_not_reached ();
1521 * Addresses from start to end are already sorted. This function finds the object header
1522 * for each address and pins the object. The addresses must be inside the passed section.
1523 * Return the number of pinned objects.
1526 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1531 void *last_obj = NULL;
1532 size_t last_obj_size = 0;
1535 void **definitely_pinned = start;
1536 while (start < end) {
1538 /* the range check should be reduntant */
1539 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1540 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1541 /* multiple pointers to the same object */
1542 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1546 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1547 search_start = (void*)section->scan_starts [idx];
1548 if (!search_start || search_start > addr) {
1551 search_start = section->scan_starts [idx];
1552 if (search_start && search_start <= addr)
1555 if (!search_start || search_start > addr)
1556 search_start = start_nursery;
1558 if (search_start < last_obj)
1559 search_start = (char*)last_obj + last_obj_size;
1560 /* now addr should be in an object a short distance from search_start
1561 * Note that search_start must point to zeroed mem or point to an object.
1564 if (!*(void**)search_start) {
1565 mword p = (mword)search_start;
1566 p += sizeof (gpointer);
1567 p += ALLOC_ALIGN - 1;
1568 p &= ~(ALLOC_ALIGN - 1);
1569 search_start = (void*)p;
1572 last_obj = search_start;
1573 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1574 last_obj_size += ALLOC_ALIGN - 1;
1575 last_obj_size &= ~(ALLOC_ALIGN - 1);
1576 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %zd\n", last_obj, safe_name (last_obj), last_obj_size));
1577 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1578 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1579 pin_object (search_start);
1580 definitely_pinned [count] = search_start;
1584 /* skip to the next object */
1585 search_start = (void*)((char*)search_start + last_obj_size);
1586 } while (search_start <= addr);
1587 /* we either pinned the correct object or we ignored the addr because
1588 * it points to unused zeroed memory.
1594 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1598 static void** pin_queue;
1599 static int pin_queue_size = 0;
1600 static int next_pin_slot = 0;
1605 gap = (gap * 10) / 13;
1606 if (gap == 9 || gap == 10)
1615 compare_addr (const void *a, const void *b)
1617 return *(const void **)a - *(const void **)b;
1621 /* sort the addresses in array in increasing order */
1623 sort_addresses (void **array, int size)
1626 * qsort is slower as predicted.
1627 * qsort (array, size, sizeof (gpointer), compare_addr);
1634 gap = new_gap (gap);
1637 for (i = 0; i < end; i++) {
1639 if (array [i] > array [j]) {
1640 void* val = array [i];
1641 array [i] = array [j];
1646 if (gap == 1 && !swapped)
1652 print_nursery_gaps (void* start_nursery, void *end_nursery)
1655 gpointer first = start_nursery;
1657 for (i = 0; i < next_pin_slot; ++i) {
1658 next = pin_queue [i];
1659 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1663 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %zd\n", first, next, (char*)next-(char*)first);
1666 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1668 optimize_pin_queue (int start_slot)
1670 void **start, **cur, **end;
1671 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1672 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1673 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1674 if ((next_pin_slot - start_slot) > 1)
1675 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1676 start = cur = pin_queue + start_slot;
1677 end = pin_queue + next_pin_slot;
1680 while (*start == *cur && cur < end)
1684 next_pin_slot = start - pin_queue;
1685 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1686 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1691 realloc_pin_queue (void)
1693 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1694 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1695 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1696 free_internal_mem (pin_queue);
1697 pin_queue = new_pin;
1698 pin_queue_size = new_size;
1699 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1703 * Scan the memory between start and end and queue values which could be pointers
1704 * to the area between start_nursery and end_nursery for later consideration.
1705 * Typically used for thread stacks.
1708 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1711 while (start < end) {
1712 if (*start >= start_nursery && *start < end_nursery) {
1714 * *start can point to the middle of an object
1715 * note: should we handle pointing at the end of an object?
1716 * pinning in C# code disallows pointing at the end of an object
1717 * but there is some small chance that an optimizing C compiler
1718 * may keep the only reference to an object by pointing
1719 * at the end of it. We ignore this small chance for now.
1720 * Pointers to the end of an object are indistinguishable
1721 * from pointers to the start of the next object in memory
1722 * so if we allow that we'd need to pin two objects...
1723 * We queue the pointer in an array, the
1724 * array will then be sorted and uniqued. This way
1725 * we can coalesce several pinning pointers and it should
1726 * be faster since we'd do a memory scan with increasing
1727 * addresses. Note: we can align the address to the allocation
1728 * alignment, so the unique process is more effective.
1730 mword addr = (mword)*start;
1731 addr &= ~(ALLOC_ALIGN - 1);
1732 if (next_pin_slot >= pin_queue_size)
1733 realloc_pin_queue ();
1734 pin_queue [next_pin_slot++] = (void*)addr;
1735 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1740 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1742 #ifdef HAVE_VALGRIND_MEMCHECK_H
1744 * The pinning addresses might come from undefined memory, this is normal. Since they
1745 * are used in lots of functions, we make the memory defined here instead of having
1746 * to add a supression for those functions.
1748 VALGRIND_MAKE_MEM_DEFINED (pin_queue, next_pin_slot * sizeof (pin_queue [0]));
1753 * If generation is 0, just mark objects in the nursery, the others we don't care,
1754 * since they are not going to move anyway.
1755 * There are different areas that are scanned for pinned pointers:
1756 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1757 * *) the pinned handle table
1758 * *) the pinned roots
1760 * Note: when we'll use a write barrier for old to new gen references, we need to
1761 * keep track of old gen objects that point to pinned new gen objects because in that
1762 * case the referenced object will be moved maybe at the next collection, but there
1763 * is no write in the old generation area where the pinned object is referenced
1764 * and we may not consider it as reachable.
1767 mark_pinned_objects (int generation)
1772 * Debugging function: find in the conservative roots where @obj is being pinned.
1775 find_pinning_reference (char *obj, size_t size)
1779 char *endobj = obj + size;
1780 for (i = 0; i < roots_hash_size; ++i) {
1781 for (root = roots_hash [i]; root; root = root->next) {
1782 /* if desc is non-null it has precise info */
1783 if (!root->root_desc) {
1784 char ** start = (char**)root->start_root;
1785 while (start < (char**)root->end_root) {
1786 if (*start >= obj && *start < endobj) {
1787 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1794 find_pinning_ref_from_thread (obj, size);
1798 * The first thing we do in a collection is to identify pinned objects.
1799 * This function considers all the areas of memory that need to be
1800 * conservatively scanned.
1803 pin_from_roots (void *start_nursery, void *end_nursery)
1807 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1808 /* objects pinned from the API are inside these roots */
1809 for (i = 0; i < roots_hash_size; ++i) {
1810 for (root = roots_hash [i]; root; root = root->next) {
1811 /* if desc is non-null it has precise info */
1812 if (root->root_desc)
1814 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1815 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1818 /* now deal with the thread stacks
1819 * in the future we should be able to conservatively scan only:
1820 * *) the cpu registers
1821 * *) the unmanaged stack frames
1822 * *) the _last_ managed stack frame
1823 * *) pointers slots in managed frames
1825 pin_thread_data (start_nursery, end_nursery);
1829 * The memory area from start_root to end_root contains pointers to objects.
1830 * Their position is precisely described by @desc (this means that the pointer
1831 * can be either NULL or the pointer to the start of an object).
1832 * This functions copies them to to_space updates them.
1835 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1837 switch (desc & ROOT_DESC_TYPE_MASK) {
1838 case ROOT_DESC_BITMAP:
1839 desc >>= ROOT_DESC_TYPE_SHIFT;
1841 if ((desc & 1) && *start_root) {
1842 *start_root = copy_object (*start_root, n_start, n_end);
1843 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1849 case ROOT_DESC_RUN_LEN:
1850 case ROOT_DESC_LARGE_BITMAP:
1851 g_assert_not_reached ();
1856 alloc_fragment (void)
1858 Fragment *frag = fragment_freelist;
1860 fragment_freelist = frag->next;
1864 frag = get_internal_mem (sizeof (Fragment));
1870 * Allocate and setup the data structures needed to be able to allocate objects
1871 * in the nursery. The nursery is stored in nursery_section.
1874 alloc_nursery (void)
1876 GCMemSection *section;
1881 if (nursery_section)
1883 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %zd\n", nursery_size));
1884 /* later we will alloc a larger area for the nursery but only activate
1885 * what we need. The rest will be used as expansion if we have too many pinned
1886 * objects in the existing nursery.
1888 /* FIXME: handle OOM */
1889 section = get_internal_mem (sizeof (GCMemSection));
1890 data = get_os_memory (nursery_size, TRUE);
1891 nursery_start = nursery_next = data;
1892 nursery_real_end = data + nursery_size;
1893 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1894 total_alloc += nursery_size;
1895 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", nursery_size, total_alloc));
1896 section->data = section->next_data = data;
1897 section->size = nursery_size;
1898 section->end_data = nursery_real_end;
1899 scan_starts = nursery_size / SCAN_START_SIZE;
1900 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1901 section->num_scan_start = scan_starts;
1902 section->role = MEMORY_ROLE_GEN0;
1904 /* add to the section list */
1905 section->next = section_list;
1906 section_list = section;
1908 nursery_section = section;
1910 /* Setup the single first large fragment */
1911 frag = alloc_fragment ();
1912 frag->fragment_start = nursery_start;
1913 frag->fragment_limit = nursery_start;
1914 frag->fragment_end = nursery_real_end;
1915 nursery_frag_real_end = nursery_real_end;
1916 /* FIXME: frag here is lost */
1920 * Update roots in the old generation. Since we currently don't have the
1921 * info from the write barriers, we just scan all the objects.
1924 scan_old_generation (char *start, char* end)
1926 GCMemSection *section;
1928 LOSObject *big_object;
1931 for (section = section_list; section; section = section->next) {
1932 if (section == nursery_section)
1934 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1935 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1937 while (p < section->next_data) {
1942 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1943 p = scan_object (p, start, end);
1946 /* scan the old object space, too */
1947 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1948 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
1949 scan_object (big_object->data, start, end);
1951 /* scan the list of objects ready for finalization */
1952 for (fin = fin_ready_list; fin; fin = fin->next) {
1953 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1954 fin->object = copy_object (fin->object, start, end);
1958 static mword fragment_total = 0;
1960 * We found a fragment of free memory in the nursery: memzero it and if
1961 * it is big enough, add it to the list of fragments that can be used for
1965 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1968 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %zd\n", frag_start, frag_end, frag_size));
1969 /* memsetting just the first chunk start is bound to provide better cache locality */
1970 if (nursery_clear_policy == CLEAR_AT_GC)
1971 memset (frag_start, 0, frag_size);
1972 /* Not worth dealing with smaller fragments: need to tune */
1973 if (frag_size >= FRAGMENT_MIN_SIZE) {
1974 fragment = alloc_fragment ();
1975 fragment->fragment_start = frag_start;
1976 fragment->fragment_limit = frag_start;
1977 fragment->fragment_end = frag_end;
1978 fragment->next = nursery_fragments;
1979 nursery_fragments = fragment;
1980 fragment_total += frag_size;
1982 /* Clear unused fragments, pinning depends on this */
1983 memset (frag_start, 0, frag_size);
1988 scan_needed_big_objects (char *start_addr, char *end_addr)
1990 LOSObject *big_object;
1992 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1993 if (!big_object->scanned && object_is_pinned (big_object->data)) {
1994 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %zd\n", big_object->data, safe_name (big_object->data), big_object->size));
1995 scan_object (big_object->data, start_addr, end_addr);
1996 big_object->scanned = TRUE;
2004 drain_gray_stack (char *start_addr, char *end_addr)
2008 int fin_ready, bigo_scanned_num;
2012 * We copied all the reachable objects. Now it's the time to copy
2013 * the objects that were not referenced by the roots, but by the copied objects.
2014 * we built a stack of objects pointed to by gray_start: they are
2015 * additional roots and we may add more items as we go.
2016 * We loop until gray_start == gray_objects which means no more objects have
2017 * been added. Note this is iterative: no recursion is involved.
2018 * We need to walk the LO list as well in search of marked big objects
2019 * (use a flag since this is needed only on major collections). We need to loop
2020 * here as well, so keep a counter of marked LO (increasing it in copy_object).
2023 gray_start = to_space;
2024 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2025 while (gray_start < gray_objects) {
2026 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2027 gray_start = scan_object (gray_start, start_addr, end_addr);
2030 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2031 //scan_old_generation (start_addr, end_addr);
2032 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
2033 /* walk the finalization queue and move also the objects that need to be
2034 * finalized: use the finalized objects as new roots so the objects they depend
2035 * on are also not reclaimed. As with the roots above, only objects in the nursery
2036 * are marked/copied.
2037 * We need a loop here, since objects ready for finalizers may reference other objects
2038 * that are fin-ready. Speedup with a flag?
2041 fin_ready = num_ready_finalizers;
2042 finalize_in_range (start_addr, end_addr);
2043 bigo_scanned_num = scan_needed_big_objects (start_addr, end_addr);
2045 /* drain the new stack that might have been created */
2046 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
2047 while (gray_start < gray_objects) {
2048 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
2049 gray_start = scan_object (gray_start, start_addr, end_addr);
2051 } while (fin_ready != num_ready_finalizers || bigo_scanned_num);
2053 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
2054 to_space = gray_start;
2055 to_space_section->next_data = to_space;
2058 * handle disappearing links
2059 * Note we do this after checking the finalization queue because if an object
2060 * survives (at least long enough to be finalized) we don't clear the link.
2061 * This also deals with a possible issue with the monitor reclamation: with the Boehm
2062 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
2065 null_link_in_range (start_addr, end_addr);
2067 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2070 static int last_num_pinned = 0;
2073 build_nursery_fragments (int start_pin, int end_pin)
2075 char *frag_start, *frag_end;
2079 /* FIXME: handle non-NULL fragment_freelist */
2080 fragment_freelist = nursery_fragments;
2081 nursery_fragments = NULL;
2082 frag_start = nursery_start;
2084 /* clear scan starts */
2085 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
2086 for (i = start_pin; i < end_pin; ++i) {
2087 frag_end = pin_queue [i];
2088 /* remove the pin bit from pinned objects */
2089 unpin_object (frag_end);
2090 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
2091 frag_size = frag_end - frag_start;
2093 add_nursery_frag (frag_size, frag_start, frag_end);
2094 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2095 frag_size += ALLOC_ALIGN - 1;
2096 frag_size &= ~(ALLOC_ALIGN - 1);
2097 frag_start = (char*)pin_queue [i] + frag_size;
2099 * pin_queue [i] might point to a half-constructed string or vector whose
2100 * length field is not set. In that case, frag_start points inside the
2101 * (zero initialized) object. Find the end of the object by scanning forward.
2104 if (is_maybe_half_constructed (pin_queue [i])) {
2107 /* This is also hit for zero length arrays/strings */
2109 /* Find the end of the TLAB which contained this allocation */
2110 tlab_end = find_tlab_next_from_address (pin_queue [i]);
2113 while ((frag_start < tlab_end) && *(mword*)frag_start == 0)
2114 frag_start += sizeof (mword);
2117 * FIXME: The object is either not allocated in a TLAB, or it isn't a
2118 * half constructed object.
2123 nursery_last_pinned_end = frag_start;
2124 frag_end = nursery_real_end;
2125 frag_size = frag_end - frag_start;
2127 add_nursery_frag (frag_size, frag_start, frag_end);
2128 if (!nursery_fragments) {
2129 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
2130 for (i = start_pin; i < end_pin; ++i) {
2131 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2136 /* Clear TLABs for all threads */
2140 /* FIXME: later reduce code duplication here with the above
2141 * We don't keep track of section fragments for non-nursery sections yet, so
2145 build_section_fragments (GCMemSection *section)
2148 char *frag_start, *frag_end;
2151 /* clear scan starts */
2152 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
2153 frag_start = section->data;
2154 section->next_data = section->data;
2155 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
2156 frag_end = pin_queue [i];
2157 /* remove the pin bit from pinned objects */
2158 unpin_object (frag_end);
2159 if (frag_end >= section->data + section->size) {
2160 frag_end = section->data + section->size;
2162 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
2164 frag_size = frag_end - frag_start;
2166 memset (frag_start, 0, frag_size);
2167 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2168 frag_size += ALLOC_ALIGN - 1;
2169 frag_size &= ~(ALLOC_ALIGN - 1);
2170 frag_start = (char*)pin_queue [i] + frag_size;
2171 section->next_data = MAX (section->next_data, frag_start);
2173 frag_end = section->end_data;
2174 frag_size = frag_end - frag_start;
2176 memset (frag_start, 0, frag_size);
2180 scan_from_registered_roots (char *addr_start, char *addr_end)
2184 for (i = 0; i < roots_hash_size; ++i) {
2185 for (root = roots_hash [i]; root; root = root->next) {
2186 /* if desc is non-null it has precise info */
2187 if (!root->root_desc)
2189 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2190 precisely_scan_objects_from ((void**)root->start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc);
2196 * Collect objects in the nursery.
2199 collect_nursery (size_t requested_size)
2201 GCMemSection *section;
2202 size_t max_garbage_amount;
2204 char *orig_nursery_next;
2206 TV_DECLARE (all_atv);
2207 TV_DECLARE (all_btv);
2212 orig_nursery_next = nursery_next;
2213 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2214 /* FIXME: optimize later to use the higher address where an object can be present */
2215 nursery_next = MAX (nursery_next, nursery_real_end);
2217 if (consistency_check_at_minor_collection)
2218 check_consistency ();
2220 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2221 max_garbage_amount = nursery_next - nursery_start;
2223 /* Clear all remaining nursery fragments, pinning depends on this */
2224 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2225 g_assert (orig_nursery_next <= nursery_frag_real_end);
2226 memset (orig_nursery_next, 0, nursery_frag_real_end - orig_nursery_next);
2227 for (frag = nursery_fragments; frag; frag = frag->next) {
2228 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2233 * not enough room in the old generation to store all the possible data from
2234 * the nursery in a single continuous space.
2235 * We reset to_space if we allocated objects in degraded mode.
2237 if (to_space_section)
2238 to_space = gray_objects = to_space_section->next_data;
2239 if ((to_space_end - to_space) < max_garbage_amount) {
2240 section = alloc_section (nursery_section->size * 4);
2241 g_assert (nursery_section->size >= max_garbage_amount);
2242 to_space = gray_objects = section->next_data;
2243 to_space_end = section->end_data;
2244 to_space_section = section;
2246 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2247 nursery_section->next_data = nursery_next;
2250 mono_stats.minor_gc_count ++;
2251 /* world must be stopped already */
2252 TV_GETTIME (all_atv);
2254 /* pin from pinned handles */
2255 pin_from_roots (nursery_start, nursery_next);
2256 /* identify pinned objects */
2257 optimize_pin_queue (0);
2258 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2260 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2261 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2264 * walk all the roots and copy the young objects to the old generation,
2265 * starting from to_space
2268 scan_from_remsets (nursery_start, nursery_next);
2269 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2271 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2272 /* FIXME: later scan also alloc_pinned objects */
2274 /* the pinned objects are roots */
2275 for (i = 0; i < next_pin_slot; ++i) {
2276 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2277 scan_object (pin_queue [i], nursery_start, nursery_next);
2279 /* registered roots, this includes static fields */
2280 scan_from_registered_roots (nursery_start, nursery_next);
2282 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2284 drain_gray_stack (nursery_start, nursery_next);
2286 /* walk the pin_queue, build up the fragment list of free memory, unmark
2287 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2290 build_nursery_fragments (0, next_pin_slot);
2292 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %zd bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2294 TV_GETTIME (all_btv);
2295 mono_stats.minor_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2297 /* prepare the pin queue for the next collection */
2298 last_num_pinned = next_pin_slot;
2300 if (fin_ready_list) {
2301 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2302 mono_gc_finalize_notify ();
2307 major_collection (void)
2309 GCMemSection *section, *prev_section;
2310 LOSObject *bigobj, *prevbo;
2316 TV_DECLARE (all_atv);
2317 TV_DECLARE (all_btv);
2320 /* FIXME: only use these values for the precise scan
2321 * note that to_space pointers should be excluded anyway...
2323 char *heap_start = NULL;
2324 char *heap_end = (char*)-1;
2325 size_t copy_space_required = 0;
2328 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2330 mono_stats.major_gc_count ++;
2332 /* Clear all remaining nursery fragments, pinning depends on this */
2333 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION) {
2334 g_assert (nursery_next <= nursery_frag_real_end);
2335 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
2336 for (frag = nursery_fragments; frag; frag = frag->next) {
2337 memset (frag->fragment_start, 0, frag->fragment_end - frag->fragment_start);
2342 * FIXME: implement Mark/Compact
2343 * Until that is done, we can just apply mostly the same alg as for the nursery:
2344 * this means we need a big section to potentially copy all the other sections, so
2345 * it is not ideal specially with large heaps.
2347 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2348 collect_nursery (0);
2351 TV_GETTIME (all_atv);
2352 /* FIXME: make sure the nursery next_data ptr is updated */
2353 nursery_section->next_data = nursery_real_end;
2354 /* we should also coalesce scanning from sections close to each other
2355 * and deal with pointers outside of the sections later.
2357 /* The remsets are not useful for a major collection */
2359 /* world must be stopped already */
2361 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2362 for (section = section_list; section; section = section->next) {
2363 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2364 pin_from_roots (section->data, section->next_data);
2365 if (count != next_pin_slot) {
2367 optimize_pin_queue (count);
2368 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2369 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2370 section->pin_queue_end = next_pin_slot = count + reduced_to;
2372 copy_space_required += (char*)section->next_data - (char*)section->data;
2374 /* identify possible pointers to the insize of large objects */
2375 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2376 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2377 count = next_pin_slot;
2378 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2379 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2380 if (next_pin_slot != count) {
2381 next_pin_slot = count;
2382 pin_object (bigobj->data);
2383 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %zd from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2386 /* look for pinned addresses for pinned-alloc objects */
2387 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2388 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2389 count = next_pin_slot;
2390 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2391 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2392 if (next_pin_slot != count) {
2393 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2394 next_pin_slot = count;
2399 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2400 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2402 /* allocate the big to space */
2403 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %zd\n", copy_space_required));
2404 section = alloc_section (copy_space_required);
2405 to_space = gray_objects = section->next_data;
2406 to_space_end = section->end_data;
2407 to_space_section = section;
2409 /* the old generation doesn't need to be scanned (no remembered sets or card
2410 * table needed either): the only objects that must survive are those pinned and
2411 * those referenced by the precise roots.
2412 * mark any section without pinned objects, so we can free it since we will be able to
2413 * move all the objects.
2415 /* the pinned objects are roots (big objects are included in this list, too) */
2416 for (i = 0; i < next_pin_slot; ++i) {
2417 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2418 scan_object (pin_queue [i], heap_start, heap_end);
2420 /* registered roots, this includes static fields */
2421 scan_from_registered_roots (heap_start, heap_end);
2423 /* scan the list of objects ready for finalization */
2424 for (fin = fin_ready_list; fin; fin = fin->next) {
2425 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2426 fin->object = copy_object (fin->object, heap_start, heap_end);
2429 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2431 /* we need to go over the big object list to see if any was marked and scan it
2432 * And we need to make this in a loop, considering that objects referenced by finalizable
2433 * objects could reference big objects (this happens in drain_gray_stack ())
2435 scan_needed_big_objects (heap_start, heap_end);
2436 /* all the objects in the heap */
2437 drain_gray_stack (heap_start, heap_end);
2439 /* sweep the big objects list */
2441 for (bigobj = los_object_list; bigobj;) {
2442 if (object_is_pinned (bigobj->data)) {
2443 unpin_object (bigobj->data);
2444 bigobj->scanned = FALSE;
2447 /* not referenced anywhere, so we can free it */
2449 prevbo->next = bigobj->next;
2451 los_object_list = bigobj->next;
2453 bigobj = bigobj->next;
2454 free_large_object (to_free);
2458 bigobj = bigobj->next;
2460 /* unpin objects from the pinned chunks and free the unmarked ones */
2461 sweep_pinned_objects ();
2463 /* free the unused sections */
2464 prev_section = NULL;
2465 for (section = section_list; section;) {
2466 /* to_space doesn't need handling here and the nursery is special */
2467 if (section == to_space_section || section == nursery_section) {
2468 prev_section = section;
2469 section = section->next;
2472 /* no pinning object, so the section is free */
2473 if (section->pin_queue_start == section->pin_queue_end) {
2474 GCMemSection *to_free;
2476 prev_section->next = section->next;
2478 section_list = section->next;
2480 section = section->next;
2481 free_mem_section (to_free);
2484 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2485 build_section_fragments (section);
2487 prev_section = section;
2488 section = section->next;
2491 /* walk the pin_queue, build up the fragment list of free memory, unmark
2492 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2495 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2497 TV_GETTIME (all_btv);
2498 mono_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
2499 /* prepare the pin queue for the next collection */
2501 if (fin_ready_list) {
2502 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2503 mono_gc_finalize_notify ();
2508 * Allocate a new section of memory to be used as old generation.
2510 static GCMemSection*
2511 alloc_section (size_t size)
2513 GCMemSection *section;
2516 size_t new_size = next_section_size;
2518 if (size > next_section_size) {
2520 new_size += pagesize - 1;
2521 new_size &= ~(pagesize - 1);
2523 section_size_used++;
2524 if (section_size_used > 3) {
2525 section_size_used = 0;
2526 next_section_size *= 2;
2527 if (next_section_size > max_section_size)
2528 next_section_size = max_section_size;
2530 section = get_internal_mem (sizeof (GCMemSection));
2531 data = get_os_memory (new_size, TRUE);
2532 section->data = section->next_data = data;
2533 section->size = new_size;
2534 section->end_data = data + new_size;
2535 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2536 total_alloc += new_size;
2537 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %zd, total: %zd\n", new_size, total_alloc));
2538 section->data = data;
2539 section->size = new_size;
2540 scan_starts = new_size / SCAN_START_SIZE;
2541 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2542 section->num_scan_start = scan_starts;
2543 section->role = MEMORY_ROLE_GEN1;
2545 /* add to the section list */
2546 section->next = section_list;
2547 section_list = section;
2553 free_mem_section (GCMemSection *section)
2555 char *data = section->data;
2556 size_t size = section->size;
2557 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %zd\n", data, size));
2558 free_os_memory (data, size);
2559 free_internal_mem (section);
2560 total_alloc -= size;
2564 * When deciding if it's better to collect or to expand, keep track
2565 * of how much garbage was reclaimed with the last collection: if it's too
2567 * This is called when we could not allocate a small object.
2569 static void __attribute__((noinline))
2570 minor_collect_or_expand_inner (size_t size)
2572 int do_minor_collection = 1;
2574 if (!nursery_section) {
2578 if (do_minor_collection) {
2580 collect_nursery (size);
2581 DEBUG (2, fprintf (gc_debug_file, "Heap size: %zd, LOS size: %zd\n", total_alloc, los_memory_usage));
2583 /* this also sets the proper pointers for the next allocation */
2584 if (!search_fragment_for_size (size)) {
2586 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2587 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %zd alloc (%d pinned)\n", size, last_num_pinned));
2588 for (i = 0; i < last_num_pinned; ++i) {
2589 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2592 /* This is needed by collect_nursery () to calculate nursery_last_allocated */
2593 nursery_next = nursery_frag_real_end = NULL;
2596 //report_internal_mem_usage ();
2600 * ######################################################################
2601 * ######## Memory allocation from the OS
2602 * ######################################################################
2603 * This section of code deals with getting memory from the OS and
2604 * allocating memory for GC-internal data structures.
2605 * Internal memory can be handled with a freelist for small objects.
2609 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2610 * This must not require any lock.
2613 get_os_memory (size_t size, int activate)
2616 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2618 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2619 size += pagesize - 1;
2620 size &= ~(pagesize - 1);
2621 ptr = mono_valloc (0, size, prot_flags);
2626 * Free the memory returned by get_os_memory (), returning it to the OS.
2629 free_os_memory (void *addr, size_t size)
2631 munmap (addr, size);
2638 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2640 int i, free_pages, num_free, free_mem;
2642 for (i = 0; i < chunk->num_pages; ++i) {
2643 if (!chunk->page_sizes [i])
2646 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2647 free_mem = FREELIST_PAGESIZE * free_pages;
2648 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2649 if (!chunk->free_list [i])
2652 p = chunk->free_list [i];
2657 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2658 free_mem += freelist_sizes [i] * num_free;
2660 printf ("\tfree memory in chunk: %d\n", free_mem);
2667 report_internal_mem_usage (void) {
2670 printf ("Internal memory usage:\n");
2672 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2673 report_pinned_chunk (chunk, i++);
2675 printf ("Pinned memory usage:\n");
2677 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2678 report_pinned_chunk (chunk, i++);
2683 * the array of pointers from @start to @end contains conservative
2684 * pointers to objects inside @chunk: mark each referenced object
2688 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2690 for (; start < end; start++) {
2691 char *addr = *start;
2692 int offset = (char*)addr - (char*)chunk;
2693 int page = offset / FREELIST_PAGESIZE;
2694 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2695 int slot_size = chunk->page_sizes [page];
2697 /* the page is not allocated */
2700 /* would be faster if we restrict the sizes to power of two,
2701 * but that's a waste of memory: need to measure. it could reduce
2702 * fragmentation since there are less pages needed, if for example
2703 * someone interns strings of each size we end up with one page per
2704 * interned string (still this is just ~40 KB): with more fine-grained sizes
2705 * this increases the number of used pages.
2708 obj_offset /= slot_size;
2709 obj_offset *= slot_size;
2710 addr = (char*)chunk->start_data + obj_offset;
2712 obj_offset /= slot_size;
2713 obj_offset *= slot_size;
2714 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2717 /* if the vtable is inside the chunk it's on the freelist, so skip */
2718 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2720 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2726 sweep_pinned_objects (void)
2733 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2734 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2735 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2736 for (i = 0; i < chunk->num_pages; ++i) {
2737 obj_size = chunk->page_sizes [i];
2740 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2741 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2742 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2743 while (p + obj_size <= endp) {
2745 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2746 /* if the first word (the vtable) is outside the chunk we have an object */
2747 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2748 if (object_is_pinned (ptr)) {
2750 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2752 /* FIXME: add to freelist */
2753 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2763 * Find the slot number in the freelist for memory chunks that
2764 * can contain @size objects.
2767 slot_for_size (size_t size)
2770 /* do a binary search or lookup table later. */
2771 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2772 if (freelist_sizes [slot] >= size)
2775 g_assert_not_reached ();
2780 * Build a free list for @size memory chunks from the memory area between
2781 * start_page and end_page.
2784 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2788 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2789 p = (void**)start_page;
2790 end = (void**)(end_page - size);
2791 g_assert (!chunk->free_list [slot]);
2792 chunk->free_list [slot] = p;
2793 while ((char*)p + size <= (char*)end) {
2795 *p = (void*)((char*)p + size);
2799 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2803 alloc_pinned_chunk (size_t size)
2808 size += pagesize; /* at least one page */
2809 size += pagesize - 1;
2810 size &= ~(pagesize - 1);
2811 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2812 size = PINNED_CHUNK_MIN_SIZE * 2;
2813 chunk = get_os_memory (size, TRUE);
2814 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2815 total_alloc += size;
2817 /* setup the bookeeping fields */
2818 chunk->num_pages = size / FREELIST_PAGESIZE;
2819 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2820 chunk->page_sizes = (void*)((char*)chunk + offset);
2821 offset += sizeof (int) * chunk->num_pages;
2822 offset += ALLOC_ALIGN - 1;
2823 offset &= ~(ALLOC_ALIGN - 1);
2824 chunk->free_list = (void*)((char*)chunk + offset);
2825 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2826 offset += ALLOC_ALIGN - 1;
2827 offset &= ~(ALLOC_ALIGN - 1);
2828 chunk->start_data = (void*)((char*)chunk + offset);
2830 /* allocate the first page to the freelist */
2831 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2832 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2833 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %zd\n", chunk, size));
2834 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2835 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2839 /* assumes freelist for slot is empty, so try to alloc a new page */
2841 get_chunk_freelist (PinnedChunk *chunk, int slot)
2845 p = chunk->free_list [slot];
2847 chunk->free_list [slot] = *p;
2850 for (i = 0; i < chunk->num_pages; ++i) {
2852 if (chunk->page_sizes [i])
2854 size = freelist_sizes [slot];
2855 chunk->page_sizes [i] = size;
2856 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2860 p = chunk->free_list [slot];
2862 chunk->free_list [slot] = *p;
2869 alloc_from_freelist (size_t size)
2873 PinnedChunk *pchunk;
2874 slot = slot_for_size (size);
2875 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2876 g_assert (size <= freelist_sizes [slot]);
2877 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2878 void **p = pchunk->free_list [slot];
2880 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2881 pchunk->free_list [slot] = *p;
2885 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2886 res = get_chunk_freelist (pchunk, slot);
2890 pchunk = alloc_pinned_chunk (size);
2891 /* FIXME: handle OOM */
2892 pchunk->next = pinned_chunk_list;
2893 pinned_chunk_list = pchunk;
2894 res = get_chunk_freelist (pchunk, slot);
2898 /* used for the GC-internal data structures */
2899 /* FIXME: add support for bigger sizes by allocating more than one page
2903 get_internal_mem (size_t size)
2905 return calloc (1, size);
2909 PinnedChunk *pchunk;
2910 slot = slot_for_size (size);
2911 g_assert (size <= freelist_sizes [slot]);
2912 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2913 void **p = pchunk->free_list [slot];
2915 pchunk->free_list [slot] = *p;
2919 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2920 res = get_chunk_freelist (pchunk, slot);
2924 pchunk = alloc_pinned_chunk (size);
2925 /* FIXME: handle OOM */
2926 pchunk->next = internal_chunk_list;
2927 internal_chunk_list = pchunk;
2928 res = get_chunk_freelist (pchunk, slot);
2934 free_internal_mem (void *addr)
2938 PinnedChunk *pchunk;
2939 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2940 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
2941 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
2942 int offset = (char*)addr - (char*)pchunk;
2943 int page = offset / FREELIST_PAGESIZE;
2944 int slot = slot_for_size (pchunk->page_sizes [page]);
2946 *p = pchunk->free_list [slot];
2947 pchunk->free_list [slot] = p;
2951 printf ("free of %p failed\n", addr);
2952 g_assert_not_reached ();
2957 * ######################################################################
2958 * ######## Object allocation
2959 * ######################################################################
2960 * This section of code deals with allocating memory for objects.
2961 * There are several ways:
2962 * *) allocate large objects
2963 * *) allocate normal objects
2964 * *) fast lock-free allocation
2965 * *) allocation of pinned objects
2969 free_large_object (LOSObject *obj)
2971 size_t size = obj->size;
2972 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %zd\n", obj->data, obj->size));
2974 los_memory_usage -= size;
2975 size += sizeof (LOSObject);
2976 size += pagesize - 1;
2977 size &= ~(pagesize - 1);
2978 total_alloc -= size;
2980 free_os_memory (obj, size);
2984 * Objects with size >= 64KB are allocated in the large object space.
2985 * They are currently kept track of with a linked list.
2986 * They don't move, so there is no need to pin them during collection
2987 * and we avoid the memcpy overhead.
2989 static void* __attribute__((noinline))
2990 alloc_large_inner (MonoVTable *vtable, size_t size)
2995 int just_did_major_gc = FALSE;
2997 if (los_memory_usage > next_los_collection) {
2998 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %zd (los already: %zu, limit: %zu)\n", size, los_memory_usage, next_los_collection));
2999 just_did_major_gc = TRUE;
3001 major_collection ();
3003 /* later increase based on a percent of the heap size */
3004 next_los_collection = los_memory_usage + 5*1024*1024;
3007 alloc_size += sizeof (LOSObject);
3008 alloc_size += pagesize - 1;
3009 alloc_size &= ~(pagesize - 1);
3010 /* FIXME: handle OOM */
3011 obj = get_os_memory (alloc_size, TRUE);
3013 vtslot = (void**)obj->data;
3015 total_alloc += alloc_size;
3016 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
3017 obj->next = los_object_list;
3018 los_object_list = obj;
3019 los_memory_usage += size;
3021 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %zd\n", obj->data, vtable, vtable->klass->name, size));
3025 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
3026 * an object of size @size
3027 * Return FALSE if not found (which means we need a collection)
3030 search_fragment_for_size (size_t size)
3032 Fragment *frag, *prev;
3033 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %zd\n", nursery_frag_real_end, size));
3035 if (nursery_frag_real_end > nursery_next && nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3036 /* Clear the remaining space, pinning depends on this */
3037 memset (nursery_next, 0, nursery_frag_real_end - nursery_next);
3040 for (frag = nursery_fragments; frag; frag = frag->next) {
3041 if (size <= (frag->fragment_end - frag->fragment_start)) {
3042 /* remove from the list */
3044 prev->next = frag->next;
3046 nursery_fragments = frag->next;
3047 nursery_next = frag->fragment_start;
3048 nursery_frag_real_end = frag->fragment_end;
3050 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %zd (req: %zd)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
3051 frag->next = fragment_freelist;
3052 fragment_freelist = frag;
3061 * size is already rounded up and we hold the GC lock.
3064 alloc_degraded (MonoVTable *vtable, size_t size)
3066 GCMemSection *section;
3068 for (section = section_list; section; section = section->next) {
3069 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
3070 p = (void**)section->next_data;
3075 section = alloc_section (nursery_section->size * 4);
3076 /* FIXME: handle OOM */
3077 p = (void**)section->next_data;
3079 section->next_data += size;
3080 degraded_mode += size;
3081 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %zd in section %p\n", p, vtable, vtable->klass->name, size, section));
3087 * Provide a variant that takes just the vtable for small fixed-size objects.
3088 * The aligned size is already computed and stored in vt->gc_descr.
3089 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
3090 * processing. We can keep track of where objects start, for example,
3091 * so when we scan the thread stacks for pinned objects, we can start
3092 * a search for the pinned object in SCAN_START_SIZE chunks.
3095 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
3097 /* FIXME: handle OOM */
3102 size += ALLOC_ALIGN - 1;
3103 size &= ~(ALLOC_ALIGN - 1);
3105 g_assert (vtable->gc_descr);
3107 if (G_UNLIKELY (collect_before_allocs)) {
3110 if (nursery_section) {
3113 update_current_thread_stack (&dummy);
3115 collect_nursery (0);
3117 if (!degraded_mode && !search_fragment_for_size (size)) {
3119 g_assert_not_reached ();
3125 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
3127 p = (void**)tlab_next;
3128 /* FIXME: handle overflow */
3129 new_next = (char*)p + size;
3130 tlab_next = new_next;
3132 if (G_LIKELY (new_next < tlab_temp_end)) {
3136 * FIXME: We might need a memory barrier here so the change to tlab_next is
3137 * visible before the vtable store.
3140 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3148 /* there are two cases: the object is too big or we run out of space in the TLAB */
3149 /* we also reach here when the thread does its first allocation after a minor
3150 * collection, since the tlab_ variables are initialized to NULL.
3151 * there can be another case (from ORP), if we cooperate with the runtime a bit:
3152 * objects that need finalizers can have the high bit set in their size
3153 * so the above check fails and we can readily add the object to the queue.
3154 * This avoids taking again the GC lock when registering, but this is moot when
3155 * doing thread-local allocation, so it may not be a good idea.
3158 if (size > MAX_SMALL_OBJ_SIZE) {
3159 /* get ready for possible collection */
3160 update_current_thread_stack (&dummy);
3162 p = alloc_large_inner (vtable, size);
3164 if (tlab_next >= tlab_real_end) {
3166 * Run out of space in the TLAB. When this happens, some amount of space
3167 * remains in the TLAB, but not enough to satisfy the current allocation
3168 * request. Currently, we retire the TLAB in all cases, later we could
3169 * keep it if the remaining space is above a treshold, and satisfy the
3170 * allocation directly from the nursery.
3173 /* when running in degraded mode, we continue allocing that way
3174 * for a while, to decrease the number of useless nursery collections.
3176 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
3177 p = alloc_degraded (vtable, size);
3182 if (size > tlab_size) {
3183 /* Allocate directly from the nursery */
3184 if (nursery_next + size >= nursery_frag_real_end) {
3185 if (!search_fragment_for_size (size)) {
3186 /* get ready for possible collection */
3187 update_current_thread_stack (&dummy);
3188 minor_collect_or_expand_inner (size);
3189 if (degraded_mode) {
3190 p = alloc_degraded (vtable, size);
3197 p = (void*)nursery_next;
3198 nursery_next += size;
3199 if (nursery_next > nursery_frag_real_end) {
3204 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3205 memset (p, 0, size);
3207 DEBUG (3, fprintf (gc_debug_file, "Retire TLAB: %p-%p [%ld]\n", tlab_start, tlab_real_end, (long)(tlab_real_end - tlab_next - size)));
3209 if (nursery_next + tlab_size >= nursery_frag_real_end) {
3210 res = search_fragment_for_size (tlab_size);
3212 /* get ready for possible collection */
3213 update_current_thread_stack (&dummy);
3214 minor_collect_or_expand_inner (tlab_size);
3215 if (degraded_mode) {
3216 p = alloc_degraded (vtable, size);
3223 /* Allocate a new TLAB from the current nursery fragment */
3224 tlab_start = nursery_next;
3225 nursery_next += tlab_size;
3226 tlab_next = tlab_start;
3227 tlab_real_end = tlab_start + tlab_size;
3228 tlab_temp_end = tlab_start + MIN (SCAN_START_SIZE, tlab_size);
3230 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION)
3231 memset (tlab_start, 0, tlab_size);
3233 /* Allocate from the TLAB */
3234 p = (void*)tlab_next;
3236 g_assert (tlab_next <= tlab_real_end);
3238 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3241 /* Reached tlab_temp_end */
3243 /* record the scan start so we can find pinned objects more easily */
3244 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = (char*)p;
3245 /* we just bump tlab_temp_end as well */
3246 tlab_temp_end = MIN (tlab_real_end, tlab_next + SCAN_START_SIZE);
3247 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", tlab_next, tlab_temp_end));
3251 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3260 * To be used for interned strings and possibly MonoThread, reflection handles.
3261 * We may want to explicitly free these objects.
3264 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
3266 /* FIXME: handle OOM */
3268 size += ALLOC_ALIGN - 1;
3269 size &= ~(ALLOC_ALIGN - 1);
3271 if (size > MAX_FREELIST_SIZE) {
3272 update_current_thread_stack (&p);
3273 /* large objects are always pinned anyway */
3274 p = alloc_large_inner (vtable, size);
3276 p = alloc_from_freelist (size);
3277 memset (p, 0, size);
3279 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %zd\n", p, vtable, vtable->klass->name, size));
3286 * ######################################################################
3287 * ######## Finalization support
3288 * ######################################################################
3292 * this is valid for the nursery: if the object has been forwarded it means it's
3293 * still refrenced from a root. If it is pinned it's still alive as well.
3294 * Return TRUE if @obj is ready to be finalized.
3296 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
3299 finalize_in_range (char *start, char *end)
3301 FinalizeEntry *entry, *prev;
3305 for (i = 0; i < finalizable_hash_size; ++i) {
3307 for (entry = finalizable_hash [i]; entry;) {
3308 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3309 if (object_is_fin_ready (entry->object)) {
3311 FinalizeEntry *next;
3312 /* remove and put in fin_ready_list */
3314 prev->next = entry->next;
3316 finalizable_hash [i] = entry->next;
3318 num_ready_finalizers++;
3319 num_registered_finalizers--;
3320 entry->next = fin_ready_list;
3321 fin_ready_list = entry;
3322 /* Make it survive */
3323 from = entry->object;
3324 entry->object = copy_object (entry->object, start, end);
3325 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3329 /* update pointer */
3330 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3331 entry->object = copy_object (entry->object, start, end);
3335 entry = entry->next;
3341 null_link_in_range (char *start, char *end)
3343 FinalizeEntry *entry, *prev;
3345 for (i = 0; i < disappearing_link_hash_size; ++i) {
3347 for (entry = disappearing_link_hash [i]; entry;) {
3348 if ((char*)entry->object >= start && (char*)entry->object < end && ((char*)entry->object < to_space || (char*)entry->object >= to_space_end)) {
3349 if (object_is_fin_ready (entry->object)) {
3350 void **p = entry->data;
3353 /* remove from list */
3355 prev->next = entry->next;
3357 disappearing_link_hash [i] = entry->next;
3358 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3360 free_internal_mem (entry);
3362 num_disappearing_links--;
3366 /* update pointer if it's moved
3367 * FIXME: what if an object is moved earlier?
3369 entry->object = copy_object (entry->object, start, end);
3370 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3372 *link = entry->object;
3376 entry = entry->next;
3382 * mono_gc_finalizers_for_domain:
3383 * @domain: the unloading appdomain
3384 * @out_array: output array
3385 * @out_size: size of output array
3387 * Store inside @out_array up to @out_size objects that belong to the unloading
3388 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3389 * until it returns 0.
3390 * The items are removed from the finalizer data structure, so the caller is supposed
3392 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3395 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3397 FinalizeEntry *entry, *prev;
3399 if (no_finalize || !out_size || !out_array)
3403 for (i = 0; i < finalizable_hash_size; ++i) {
3405 for (entry = finalizable_hash [i]; entry;) {
3406 if (mono_object_domain (entry->object) == domain) {
3407 FinalizeEntry *next;
3408 /* remove and put in out_array */
3410 prev->next = entry->next;
3412 finalizable_hash [i] = entry->next;
3414 num_registered_finalizers--;
3415 out_array [count ++] = entry->object;
3416 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3418 if (count == out_size) {
3425 entry = entry->next;
3433 rehash_fin_table (void)
3437 FinalizeEntry **new_hash;
3438 FinalizeEntry *entry, *next;
3439 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3441 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3442 for (i = 0; i < finalizable_hash_size; ++i) {
3443 for (entry = finalizable_hash [i]; entry; entry = next) {
3444 hash = mono_object_hash (entry->object) % new_size;
3446 entry->next = new_hash [hash];
3447 new_hash [hash] = entry;
3450 free_internal_mem (finalizable_hash);
3451 finalizable_hash = new_hash;
3452 finalizable_hash_size = new_size;
3456 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3458 FinalizeEntry *entry, *prev;
3462 hash = mono_object_hash (obj);
3464 if (num_registered_finalizers >= finalizable_hash_size * 2)
3465 rehash_fin_table ();
3466 hash %= finalizable_hash_size;
3468 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3469 if (entry->object == obj) {
3471 entry->data = user_data;
3473 /* remove from the list */
3475 prev->next = entry->next;
3477 finalizable_hash [hash] = entry->next;
3478 num_registered_finalizers--;
3479 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3480 free_internal_mem (entry);
3488 /* request to deregister, but already out of the list */
3492 entry = get_internal_mem (sizeof (FinalizeEntry));
3493 entry->object = obj;
3494 entry->data = user_data;
3495 entry->next = finalizable_hash [hash];
3496 finalizable_hash [hash] = entry;
3497 num_registered_finalizers++;
3498 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3503 rehash_dislink (void)
3507 FinalizeEntry **new_hash;
3508 FinalizeEntry *entry, *next;
3509 int new_size = g_spaced_primes_closest (num_disappearing_links);
3511 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3512 for (i = 0; i < disappearing_link_hash_size; ++i) {
3513 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3514 hash = mono_aligned_addr_hash (entry->data) % new_size;
3516 entry->next = new_hash [hash];
3517 new_hash [hash] = entry;
3520 free_internal_mem (disappearing_link_hash);
3521 disappearing_link_hash = new_hash;
3522 disappearing_link_hash_size = new_size;
3526 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3528 FinalizeEntry *entry, *prev;
3532 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3534 /* FIXME: add check that link is not in the heap */
3535 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3536 entry = disappearing_link_hash [hash];
3538 for (; entry; entry = entry->next) {
3539 /* link already added */
3540 if (link == entry->data) {
3541 /* NULL obj means remove */
3544 prev->next = entry->next;
3546 disappearing_link_hash [hash] = entry->next;
3547 num_disappearing_links--;
3548 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3549 free_internal_mem (entry);
3551 entry->object = obj; /* we allow the change of object */
3558 entry = get_internal_mem (sizeof (FinalizeEntry));
3559 entry->object = obj;
3561 entry->next = disappearing_link_hash [hash];
3562 disappearing_link_hash [hash] = entry;
3563 num_disappearing_links++;
3564 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3569 mono_gc_invoke_finalizers (void)
3571 FinalizeEntry *entry;
3574 /* FIXME: batch to reduce lock contention */
3575 while (fin_ready_list) {
3577 entry = fin_ready_list;
3579 fin_ready_list = entry->next;
3580 num_ready_finalizers--;
3581 obj = entry->object;
3582 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3586 void (*callback)(void *, void*) = entry->data;
3588 obj = entry->object;
3590 /* the object is on the stack so it is pinned */
3591 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3592 callback (obj, NULL);
3593 free_internal_mem (entry);
3600 mono_gc_pending_finalizers (void)
3602 return fin_ready_list != NULL;
3605 /* Negative value to remove */
3607 mono_gc_add_memory_pressure (gint64 value)
3609 /* FIXME: Use interlocked functions */
3611 memory_pressure += value;
3616 * ######################################################################
3617 * ######## registered roots support
3618 * ######################################################################
3626 RootRecord **new_hash;
3627 RootRecord *entry, *next;
3628 int new_size = g_spaced_primes_closest (num_roots_entries);
3630 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3631 for (i = 0; i < roots_hash_size; ++i) {
3632 for (entry = roots_hash [i]; entry; entry = next) {
3633 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3635 entry->next = new_hash [hash];
3636 new_hash [hash] = entry;
3639 free_internal_mem (roots_hash);
3640 roots_hash = new_hash;
3641 roots_hash_size = new_size;
3645 * We do not coalesce roots.
3648 mono_gc_register_root (char *start, size_t size, void *descr)
3650 RootRecord *new_root;
3651 unsigned int hash = mono_aligned_addr_hash (start);
3653 if (num_roots_entries >= roots_hash_size * 2)
3655 hash %= roots_hash_size;
3656 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3657 /* we allow changing the size and the descriptor (for thread statics etc) */
3658 if (new_root->start_root == start) {
3659 size_t old_size = new_root->end_root - new_root->start_root;
3660 new_root->end_root = new_root->start_root + size;
3661 new_root->root_desc = (mword)descr;
3663 roots_size -= old_size;
3668 new_root = get_internal_mem (sizeof (RootRecord));
3670 new_root->start_root = start;
3671 new_root->end_root = new_root->start_root + size;
3672 new_root->root_desc = (mword)descr;
3674 num_roots_entries++;
3675 new_root->next = roots_hash [hash];
3676 roots_hash [hash] = new_root;
3677 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3687 mono_gc_deregister_root (char* addr)
3689 RootRecord *tmp, *prev = NULL;
3690 unsigned int hash = mono_aligned_addr_hash (addr);
3692 hash %= roots_hash_size;
3693 tmp = roots_hash [hash];
3695 if (tmp->start_root == (char*)addr) {
3697 prev->next = tmp->next;
3699 roots_hash [hash] = tmp->next;
3700 roots_size -= (tmp->end_root - tmp->start_root);
3701 num_roots_entries--;
3702 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3703 free_internal_mem (tmp);
3713 * ######################################################################
3714 * ######## Thread handling (stop/start code)
3715 * ######################################################################
3718 /* eventually share with MonoThread? */
3719 typedef struct _SgenThreadInfo SgenThreadInfo;
3721 struct _SgenThreadInfo {
3722 SgenThreadInfo *next;
3723 ARCH_THREAD_TYPE id;
3724 unsigned int stop_count; /* to catch duplicate signals */
3729 char **tlab_next_addr;
3730 char **tlab_start_addr;
3731 char **tlab_temp_end_addr;
3732 char **tlab_real_end_addr;
3733 RememberedSet *remset;
3736 /* FIXME: handle large/small config */
3737 #define THREAD_HASH_SIZE 11
3738 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3740 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3742 #if USE_SIGNAL_BASED_START_STOP_WORLD
3744 static sem_t suspend_ack_semaphore;
3745 static unsigned int global_stop_count = 0;
3746 static int suspend_signal_num = SIGPWR;
3747 static int restart_signal_num = SIGXCPU;
3748 static sigset_t suspend_signal_mask;
3749 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3751 /* LOCKING: assumes the GC lock is held */
3752 static SgenThreadInfo*
3753 thread_info_lookup (ARCH_THREAD_TYPE id)
3755 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3756 SgenThreadInfo *info;
3758 info = thread_table [hash];
3759 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
3766 update_current_thread_stack (void *start)
3768 void *ptr = cur_thread_regs;
3769 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
3770 info->stack_start = align_pointer (&ptr);
3771 ARCH_STORE_REGS (ptr);
3775 signal_desc (int signum)
3777 if (signum == suspend_signal_num)
3779 if (signum == restart_signal_num)
3784 /* LOCKING: assumes the GC lock is held */
3786 thread_handshake (int signum)
3788 int count, i, result;
3789 SgenThreadInfo *info;
3790 pthread_t me = pthread_self ();
3793 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3794 for (info = thread_table [i]; info; info = info->next) {
3795 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3796 if (ARCH_THREAD_EQUALS (info->id, me)) {
3797 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3800 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3802 result = pthread_kill (info->id, signum);
3804 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3807 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3813 for (i = 0; i < count; ++i) {
3814 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3815 if (errno != EINTR) {
3816 g_error ("sem_wait ()");
3823 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3825 suspend_handler (int sig)
3827 SgenThreadInfo *info;
3830 int old_errno = errno;
3832 id = pthread_self ();
3833 info = thread_info_lookup (id);
3834 stop_count = global_stop_count;
3835 /* duplicate signal */
3836 if (0 && info->stop_count == stop_count) {
3840 /* update the remset info in the thread data structure */
3841 info->remset = remembered_set;
3843 * this includes the register values that the kernel put on the stack.
3844 * Write arch-specific code to only push integer regs and a more accurate
3847 info->stack_start = align_pointer (&id);
3849 /* notify the waiting thread */
3850 sem_post (&suspend_ack_semaphore);
3851 info->stop_count = stop_count;
3853 /* wait until we receive the restart signal */
3856 sigsuspend (&suspend_signal_mask);
3857 } while (info->signal != restart_signal_num);
3859 /* notify the waiting thread */
3860 sem_post (&suspend_ack_semaphore);
3866 restart_handler (int sig)
3868 SgenThreadInfo *info;
3869 int old_errno = errno;
3871 info = thread_info_lookup (pthread_self ());
3872 info->signal = restart_signal_num;
3877 static TV_DECLARE (stop_world_time);
3878 static unsigned long max_pause_usec = 0;
3880 /* LOCKING: assumes the GC lock is held */
3886 global_stop_count++;
3887 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
3888 TV_GETTIME (stop_world_time);
3889 count = thread_handshake (suspend_signal_num);
3890 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3894 /* LOCKING: assumes the GC lock is held */
3896 restart_world (void)
3899 TV_DECLARE (end_sw);
3902 count = thread_handshake (restart_signal_num);
3903 TV_GETTIME (end_sw);
3904 usec = TV_ELAPSED (stop_world_time, end_sw);
3905 max_pause_usec = MAX (usec, max_pause_usec);
3906 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3910 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
3913 * Identify objects pinned in a thread stack and its registers.
3916 pin_thread_data (void *start_nursery, void *end_nursery)
3919 SgenThreadInfo *info;
3921 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3922 for (info = thread_table [i]; info; info = info->next) {
3924 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3927 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %zd\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3928 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3931 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3932 conservatively_pin_objects_from ((void*)cur_thread_regs, (void*)(cur_thread_regs + ARCH_NUM_REGS), start_nursery, end_nursery);
3936 find_pinning_ref_from_thread (char *obj, size_t size)
3939 SgenThreadInfo *info;
3940 char *endobj = obj + size;
3942 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3943 for (info = thread_table [i]; info; info = info->next) {
3944 char **start = (char**)info->stack_start;
3947 while (start < (char**)info->stack_end) {
3948 if (*start >= obj && *start < endobj) {
3949 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, (gpointer)info->id, start, info->stack_start, info->stack_end));
3955 /* FIXME: check register */
3958 /* return TRUE if ptr points inside the managed heap */
3960 ptr_in_heap (void* ptr)
3962 mword p = (mword)ptr;
3963 if (p < lowest_heap_address || p >= highest_heap_address)
3965 /* FIXME: more checks */
3970 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
3976 /* FIXME: exclude stack locations */
3977 switch ((*p) & REMSET_TYPE_MASK) {
3978 case REMSET_LOCATION:
3980 if (((void*)ptr < start_nursery || (void*)ptr >= end_nursery) && ptr_in_heap (ptr)) {
3981 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3982 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
3983 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3984 add_to_global_remset (ptr);
3986 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
3990 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3991 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
3994 while (count-- > 0) {
3995 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3996 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
3997 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3998 add_to_global_remset (ptr);
4003 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4004 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4006 scan_object (*ptr, start_nursery, end_nursery);
4009 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4010 if (((void*)ptr >= start_nursery && (void*)ptr < end_nursery) || !ptr_in_heap (ptr))
4013 scan_vtype ((char*)ptr, desc, start_nursery, end_nursery);
4016 g_assert_not_reached ();
4022 scan_from_remsets (void *start_nursery, void *end_nursery)
4025 SgenThreadInfo *info;
4026 RememberedSet *remset, *next;
4029 /* the global one */
4030 for (remset = global_remset; remset; remset = remset->next) {
4031 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4032 for (p = remset->data; p < remset->store_next;) {
4033 p = handle_remset (p, start_nursery, end_nursery, TRUE);
4036 /* the per-thread ones */
4037 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4038 for (info = thread_table [i]; info; info = info->next) {
4039 for (remset = info->remset; remset; remset = next) {
4040 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4041 for (p = remset->data; p < remset->store_next;) {
4042 p = handle_remset (p, start_nursery, end_nursery, FALSE);
4044 remset->store_next = remset->data;
4045 next = remset->next;
4046 remset->next = NULL;
4047 if (remset != info->remset) {
4048 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4049 free_internal_mem (remset);
4057 * Clear the info in the remembered sets: we're doing a major collection, so
4058 * the per-thread ones are not needed and the global ones will be reconstructed
4062 clear_remsets (void)
4065 SgenThreadInfo *info;
4066 RememberedSet *remset, *next;
4068 /* the global list */
4069 for (remset = global_remset; remset; remset = next) {
4070 remset->store_next = remset->data;
4071 next = remset->next;
4072 remset->next = NULL;
4073 if (remset != global_remset) {
4074 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4075 free_internal_mem (remset);
4078 /* the per-thread ones */
4079 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4080 for (info = thread_table [i]; info; info = info->next) {
4081 for (remset = info->remset; remset; remset = next) {
4082 remset->store_next = remset->data;
4083 next = remset->next;
4084 remset->next = NULL;
4085 if (remset != info->remset) {
4086 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
4087 free_internal_mem (remset);
4095 * Clear the thread local TLAB variables for all threads.
4100 SgenThreadInfo *info;
4103 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4104 for (info = thread_table [i]; info; info = info->next) {
4105 /* A new TLAB will be allocated when the thread does its first allocation */
4106 *info->tlab_start_addr = NULL;
4107 *info->tlab_next_addr = NULL;
4108 *info->tlab_temp_end_addr = NULL;
4109 *info->tlab_real_end_addr = NULL;
4115 * Find the tlab_next value of the TLAB which contains ADDR.
4118 find_tlab_next_from_address (char *addr)
4120 SgenThreadInfo *info;
4123 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4124 for (info = thread_table [i]; info; info = info->next) {
4125 if (addr >= *info->tlab_start_addr && addr < *info->tlab_next_addr)
4126 return *info->tlab_next_addr;
4133 /* LOCKING: assumes the GC lock is held */
4134 static SgenThreadInfo*
4135 gc_register_current_thread (void *addr)
4138 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
4141 info->id = ARCH_GET_THREAD ();
4142 info->stop_count = -1;
4145 info->stack_start = NULL;
4146 info->tlab_start_addr = &tlab_start;
4147 info->tlab_next_addr = &tlab_next;
4148 info->tlab_temp_end_addr = &tlab_temp_end;
4149 info->tlab_real_end_addr = &tlab_real_end;
4151 tlab_next_addr = &tlab_next;
4153 /* try to get it with attributes first */
4154 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
4158 pthread_attr_t attr;
4159 pthread_getattr_np (pthread_self (), &attr);
4160 pthread_attr_getstack (&attr, &sstart, &size);
4161 info->stack_end = (char*)sstart + size;
4162 pthread_attr_destroy (&attr);
4164 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
4165 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
4168 /* FIXME: we assume the stack grows down */
4169 gsize stack_bottom = (gsize)addr;
4170 stack_bottom += 4095;
4171 stack_bottom &= ~4095;
4172 info->stack_end = (char*)stack_bottom;
4176 /* hash into the table */
4177 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
4178 info->next = thread_table [hash];
4179 thread_table [hash] = info;
4181 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
4182 pthread_setspecific (remembered_set_key, remembered_set);
4183 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
4188 unregister_current_thread (void)
4191 SgenThreadInfo *prev = NULL;
4193 RememberedSet *rset;
4194 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
4196 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
4197 p = thread_table [hash];
4199 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
4200 while (!ARCH_THREAD_EQUALS (p->id, id)) {
4205 thread_table [hash] = p->next;
4207 prev->next = p->next;
4210 /* FIXME: transfer remsets if any */
4212 RememberedSet *next = rset->next;
4213 free_internal_mem (rset);
4220 unregister_thread (void *k)
4223 unregister_current_thread ();
4228 mono_gc_register_thread (void *baseptr)
4230 SgenThreadInfo *info;
4232 info = thread_info_lookup (ARCH_GET_THREAD ());
4234 info = gc_register_current_thread (baseptr);
4236 return info != NULL;
4239 #if USE_PTHREAD_INTERCEPT
4241 #undef pthread_create
4243 #undef pthread_detach
4246 void *(*start_routine) (void *);
4250 } SgenThreadStartInfo;
4253 gc_start_thread (void *arg)
4255 SgenThreadStartInfo *start_info = arg;
4256 SgenThreadInfo* info;
4257 void *t_arg = start_info->arg;
4258 void *(*start_func) (void*) = start_info->start_routine;
4262 info = gc_register_current_thread (&result);
4264 sem_post (&(start_info->registered));
4265 result = start_func (t_arg);
4267 * this is done by the pthread key dtor
4269 unregister_current_thread ();
4277 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
4279 SgenThreadStartInfo *start_info;
4282 start_info = malloc (sizeof (SgenThreadStartInfo));
4285 sem_init (&(start_info->registered), 0, 0);
4286 start_info->arg = arg;
4287 start_info->start_routine = start_routine;
4289 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
4291 while (sem_wait (&(start_info->registered)) != 0) {
4292 /*if (EINTR != errno) ABORT("sem_wait failed"); */
4295 sem_destroy (&(start_info->registered));
4301 mono_gc_pthread_join (pthread_t thread, void **retval)
4303 return pthread_join (thread, retval);
4307 mono_gc_pthread_detach (pthread_t thread)
4309 return pthread_detach (thread);
4312 #endif /* USE_PTHREAD_INTERCEPT */
4315 * ######################################################################
4316 * ######## Write barriers
4317 * ######################################################################
4320 static RememberedSet*
4321 alloc_remset (int size, gpointer id) {
4322 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
4323 res->store_next = res->data;
4324 res->end_set = res->data + size;
4326 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
4331 * Note: the write barriers first do the needed GC work and then do the actual store:
4332 * this way the value is visible to the conservative GC scan after the write barrier
4333 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
4334 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
4335 * happens when we need to record which pointers contain references to the new generation.
4336 * The write barrier will be executed, but the pointer is still not stored.
4339 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
4342 if ((char*)field_ptr >= nursery_start && (char*)field_ptr < nursery_real_end) {
4343 *(void**)field_ptr = value;
4346 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
4347 rs = remembered_set;
4348 if (rs->store_next < rs->end_set) {
4349 *(rs->store_next++) = (mword)field_ptr;
4350 *(void**)field_ptr = value;
4353 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4354 rs->next = remembered_set;
4355 remembered_set = rs;
4356 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4357 *(rs->store_next++) = (mword)field_ptr;
4358 *(void**)field_ptr = value;
4362 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
4364 RememberedSet *rs = remembered_set;
4365 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end) {
4366 *(void**)slot_ptr = value;
4369 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
4370 if (rs->store_next < rs->end_set) {
4371 *(rs->store_next++) = (mword)slot_ptr;
4372 *(void**)slot_ptr = value;
4375 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4376 rs->next = remembered_set;
4377 remembered_set = rs;
4378 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4379 *(rs->store_next++) = (mword)slot_ptr;
4380 *(void**)slot_ptr = value;
4384 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
4386 RememberedSet *rs = remembered_set;
4387 if ((char*)slot_ptr >= nursery_start && (char*)slot_ptr < nursery_real_end)
4389 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
4390 if (rs->store_next + 1 < rs->end_set) {
4391 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4392 *(rs->store_next++) = count;
4395 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4396 rs->next = remembered_set;
4397 remembered_set = rs;
4398 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4399 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4400 *(rs->store_next++) = count;
4404 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4406 RememberedSet *rs = remembered_set;
4407 if ((char*)ptr >= nursery_start && (char*)ptr < nursery_real_end) {
4408 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4409 *(void**)ptr = value;
4412 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", ptr));
4413 /* FIXME: ensure it is on the heap */
4414 if (rs->store_next < rs->end_set) {
4415 *(rs->store_next++) = (mword)ptr;
4416 *(void**)ptr = value;
4419 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4420 rs->next = remembered_set;
4421 remembered_set = rs;
4422 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4423 *(rs->store_next++) = (mword)ptr;
4424 *(void**)ptr = value;
4428 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4430 RememberedSet *rs = remembered_set;
4431 if ((char*)dest >= nursery_start && (char*)dest < nursery_real_end) {
4434 DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4436 if (rs->store_next + 1 < rs->end_set) {
4437 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4438 *(rs->store_next++) = (mword)klass->gc_descr;
4441 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4442 rs->next = remembered_set;
4443 remembered_set = rs;
4444 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4445 *(rs->store_next++) = (mword)dest | REMSET_VTYPE;
4446 *(rs->store_next++) = (mword)klass->gc_descr;
4450 * mono_gc_wbarrier_object:
4452 * Write barrier to call when obj is the result of a clone or copy of an object.
4455 mono_gc_wbarrier_object (MonoObject* obj)
4457 RememberedSet *rs = remembered_set;
4458 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4459 if (rs->store_next < rs->end_set) {
4460 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4463 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4464 rs->next = remembered_set;
4465 remembered_set = rs;
4466 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4467 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4471 * ######################################################################
4472 * ######## Collector debugging
4473 * ######################################################################
4476 const char*descriptor_types [] = {
4488 describe_ptr (char *ptr)
4490 GCMemSection *section;
4495 if ((ptr >= nursery_start) && (ptr < nursery_real_end)) {
4496 printf ("Pointer inside nursery.\n");
4498 for (section = section_list; section;) {
4499 if (ptr >= section->data && ptr < section->data + section->size)
4501 section = section->next;
4505 printf ("Pointer inside oldspace.\n");
4507 printf ("Pointer unknown.\n");
4512 // FIXME: Handle pointers to the inside of objects
4513 vtable = (MonoVTable*)LOAD_VTABLE (ptr);
4515 printf ("VTable: %p\n", vtable);
4516 if (vtable == NULL) {
4517 printf ("VTable is invalid (empty).\n");
4520 if (((char*)vtable >= nursery_start) && ((char*)vtable < nursery_real_end)) {
4521 printf ("VTable is invalid (points inside nursery).\n");
4524 printf ("Class: %s\n", vtable->klass->name);
4526 desc = ((GCVTable*)vtable)->desc;
4527 printf ("Descriptor: %lx\n", desc);
4530 printf ("Descriptor type: %d (%s)\n", type, descriptor_types [type]);
4534 find_in_remset_loc (mword *p, char *addr, gboolean *found)
4540 switch ((*p) & REMSET_TYPE_MASK) {
4541 case REMSET_LOCATION:
4542 if (*p == (mword)addr)
4546 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4548 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4552 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4553 count = safe_object_get_size ((MonoObject*)ptr);
4554 count += (ALLOC_ALIGN - 1);
4555 count &= (ALLOC_ALIGN - 1);
4556 count /= sizeof (mword);
4557 if ((void**)addr >= ptr && (void**)addr < ptr + count)
4561 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
4564 switch (desc & 0x7) {
4565 case DESC_TYPE_RUN_LENGTH:
4566 OBJ_RUN_LEN_SIZE (skip_size, desc, ptr);
4567 /* The descriptor includes the size of MonoObject */
4568 skip_size -= sizeof (MonoObject);
4569 if ((void**)addr >= ptr && (void**)addr < ptr + (skip_size / sizeof (gpointer)))
4574 g_assert_not_reached ();
4579 g_assert_not_reached ();
4585 * Return whenever ADDR occurs in the remembered sets
4588 find_in_remsets (char *addr)
4591 SgenThreadInfo *info;
4592 RememberedSet *remset;
4594 gboolean found = FALSE;
4596 /* the global one */
4597 for (remset = global_remset; remset; remset = remset->next) {
4598 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %zd\n", remset->data, remset->store_next, remset->store_next - remset->data));
4599 for (p = remset->data; p < remset->store_next;) {
4600 p = find_in_remset_loc (p, addr, &found);
4605 /* the per-thread ones */
4606 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
4607 for (info = thread_table [i]; info; info = info->next) {
4608 for (remset = info->remset; remset; remset = remset->next) {
4609 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %zd\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
4610 for (p = remset->data; p < remset->store_next;) {
4611 p = find_in_remset_loc (p, addr, &found);
4623 #define HANDLE_PTR(ptr,obj) do { \
4624 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
4625 if (!find_in_remsets ((char*)(ptr))) { \
4626 fprintf (gc_debug_file, "Oldspace->newspace reference %p at offset %zd in object %p (%s.%s) not found in remsets.\n", *(ptr), (char*)(ptr) - (char*)(obj), (obj), ((MonoObject*)(obj))->vtable->klass->name_space, ((MonoObject*)(obj))->vtable->klass->name); \
4627 g_assert_not_reached (); \
4633 * Check that each object reference inside the area which points into the nursery
4634 * can be found in the remembered sets.
4636 static void __attribute__((noinline))
4637 check_remsets_for_area (char *start, char *end)
4642 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
4644 new_obj_references = 0;
4645 obj_references_checked = 0;
4646 while (start < end) {
4647 if (!*(void**)start) {
4648 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
4651 vt = (GCVTable*)LOAD_VTABLE (start);
4652 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
4654 MonoObject *obj = (MonoObject*)start;
4655 g_print ("found at %p (0x%lx): %s.%s\n", start, (long)vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
4659 if (type == DESC_TYPE_STRING) {
4660 STRING_SIZE (skip_size, start);
4664 } else if (type == DESC_TYPE_RUN_LENGTH) {
4665 OBJ_RUN_LEN_SIZE (skip_size, desc, start);
4666 g_assert (skip_size);
4667 OBJ_RUN_LEN_FOREACH_PTR (desc,start);
4671 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
4672 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
4673 skip_size *= mono_array_length ((MonoArray*)start);
4674 skip_size += sizeof (MonoArray);
4675 skip_size += (ALLOC_ALIGN - 1);
4676 skip_size &= ~(ALLOC_ALIGN - 1);
4677 OBJ_VECTOR_FOREACH_PTR (vt, start);
4678 if (((MonoArray*)start)->bounds) {
4679 /* account for the bounds */
4680 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4685 } else if (type == DESC_TYPE_SMALL_BITMAP) {
4686 OBJ_BITMAP_SIZE (skip_size, desc, start);
4687 g_assert (skip_size);
4688 OBJ_BITMAP_FOREACH_PTR (desc,start);
4692 } else if (type == DESC_TYPE_LARGE_BITMAP) {
4693 skip_size = safe_object_get_size ((MonoObject*)start);
4694 skip_size += (ALLOC_ALIGN - 1);
4695 skip_size &= ~(ALLOC_ALIGN - 1);
4696 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
4700 } else if (type == DESC_TYPE_COMPLEX) {
4701 /* this is a complex object */
4702 skip_size = safe_object_get_size ((MonoObject*)start);
4703 skip_size += (ALLOC_ALIGN - 1);
4704 skip_size &= ~(ALLOC_ALIGN - 1);
4705 OBJ_COMPLEX_FOREACH_PTR (vt, start);
4709 } else if (type == DESC_TYPE_COMPLEX_ARR) {
4710 /* this is an array of complex structs */
4711 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
4712 skip_size *= mono_array_length ((MonoArray*)start);
4713 skip_size += sizeof (MonoArray);
4714 skip_size += (ALLOC_ALIGN - 1);
4715 skip_size &= ~(ALLOC_ALIGN - 1);
4716 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
4717 if (((MonoArray*)start)->bounds) {
4718 /* account for the bounds */
4719 skip_size += sizeof (MonoArrayBounds) * vt->klass->rank;
4731 * Perform consistency check of the heap.
4733 * Assumes the world is stopped.
4736 check_consistency (void)
4738 GCMemSection *section;
4740 // Need to add more checks
4741 // FIXME: Create a general heap enumeration function and use that
4743 DEBUG (1, fprintf (gc_debug_file, "Begin heap consistency check...\n"));
4745 // Check that oldspace->newspace pointers are registered with the collector
4746 for (section = section_list; section; section = section->next) {
4747 if (section->role == MEMORY_ROLE_GEN0)
4749 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
4750 check_remsets_for_area (section->data, section->next_data);
4753 DEBUG (1, fprintf (gc_debug_file, "Heap consistency check done.\n"));
4757 * ######################################################################
4758 * ######## Other mono public interface functions.
4759 * ######################################################################
4763 mono_gc_collect (int generation)
4766 update_current_thread_stack (&generation);
4768 if (generation == 0) {
4769 collect_nursery (0);
4771 major_collection ();
4778 mono_gc_max_generation (void)
4784 mono_gc_collection_count (int generation)
4786 if (generation == 0)
4787 return num_minor_gcs;
4788 return num_major_gcs;
4792 mono_gc_get_used_size (void)
4795 GCMemSection *section;
4797 tot = los_memory_usage;
4798 for (section = section_list; section; section = section->next) {
4799 /* this is approximate... */
4800 tot += section->next_data - section->data;
4802 /* FIXME: account for pinned objects */
4808 mono_gc_get_heap_size (void)
4814 mono_gc_disable (void)
4822 mono_gc_enable (void)
4830 mono_object_is_alive (MonoObject* o)
4836 mono_gc_get_generation (MonoObject *obj)
4838 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
4844 mono_gc_enable_events (void)
4849 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
4851 mono_gc_register_disappearing_link (obj, link_addr);
4856 mono_gc_weak_link_remove (void **link_addr)
4858 mono_gc_register_disappearing_link (NULL, link_addr);
4863 mono_gc_weak_link_get (void **link_addr)
4869 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
4871 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
4872 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
4875 /* conservative scanning */
4876 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
4881 mono_gc_alloc_fixed (size_t size, void *descr)
4883 /* FIXME: do a single allocation */
4884 void *res = calloc (1, size);
4887 if (!mono_gc_register_root (res, size, descr)) {
4895 mono_gc_free_fixed (void* addr)
4897 mono_gc_deregister_root (addr);
4902 mono_gc_is_gc_thread (void)
4906 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
4912 mono_gc_base_init (void)
4916 struct sigaction sinfo;
4918 LOCK_INIT (gc_mutex);
4920 if (gc_initialized) {
4924 pagesize = mono_pagesize ();
4925 gc_debug_file = stderr;
4926 if ((env = getenv ("MONO_GC_DEBUG"))) {
4927 opts = g_strsplit (env, ",", -1);
4928 for (ptr = opts; ptr && *ptr; ptr ++) {
4930 if (opt [0] >= '0' && opt [0] <= '9') {
4931 gc_debug_level = atoi (opt);
4936 char *rf = g_strdup_printf ("%s.%d", opt, getpid ());
4937 gc_debug_file = fopen (rf, "wb");
4939 gc_debug_file = stderr;
4942 } else if (!strcmp (opt, "collect-before-allocs")) {
4943 collect_before_allocs = TRUE;
4944 } else if (!strcmp (opt, "check-at-minor-collections")) {
4945 consistency_check_at_minor_collection = TRUE;
4947 fprintf (stderr, "Invalid format for the MONO_GC_DEBUG env variable: '%s'\n", env);
4948 fprintf (stderr, "The format is: MONO_GC_DEBUG=[l[:filename]|<option>]+ where l is a debug level 0-9.\n");
4949 fprintf (stderr, "Valid options are: collect-before-allocs, check-at-minor-collections.\n");
4956 sem_init (&suspend_ack_semaphore, 0, 0);
4958 sigfillset (&sinfo.sa_mask);
4959 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
4960 sinfo.sa_handler = suspend_handler;
4961 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
4962 g_error ("failed sigaction");
4965 sinfo.sa_handler = restart_handler;
4966 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
4967 g_error ("failed sigaction");
4970 sigfillset (&suspend_signal_mask);
4971 sigdelset (&suspend_signal_mask, restart_signal_num);
4973 global_remset = alloc_remset (1024, NULL);
4974 global_remset->next = NULL;
4976 pthread_key_create (&remembered_set_key, unregister_thread);
4977 gc_initialized = TRUE;
4979 mono_gc_register_thread (&sinfo);
4987 /* FIXME: Do this in the JIT, where specialized allocation sequences can be created
4988 * for each class. This is currently not easy to do, as it is hard to generate basic
4989 * blocks + branches, but it is easy with the linear IL codebase.
4992 create_allocator (int atype)
4994 int tlab_next_addr_offset = -1;
4995 int tlab_temp_end_offset = -1;
4996 int p_var, size_var, tlab_next_addr_var, new_next_var;
4997 guint32 slowpath_branch;
4998 MonoMethodBuilder *mb;
5000 MonoMethodSignature *csig;
5001 static gboolean registered = FALSE;
5003 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
5004 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5006 g_assert (tlab_next_addr_offset != -1);
5007 g_assert (tlab_temp_end_offset != -1);
5009 g_assert (atype == ATYPE_NORMAL);
5012 mono_register_jit_icall (mono_gc_alloc_obj, "mono_gc_alloc_obj", mono_create_icall_signature ("object ptr int"), FALSE);
5016 csig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
5017 csig->ret = &mono_defaults.object_class->byval_arg;
5018 csig->params [0] = &mono_defaults.int_class->byval_arg;
5020 mb = mono_mb_new (mono_defaults.object_class, "Alloc", MONO_WRAPPER_ALLOC);
5021 size_var = mono_mb_add_local (mb, &mono_defaults.int32_class->byval_arg);
5022 /* size = vtable->klass->instance_size; */
5023 mono_mb_emit_ldarg (mb, 0);
5024 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoVTable, klass));
5025 mono_mb_emit_byte (mb, CEE_ADD);
5026 mono_mb_emit_byte (mb, CEE_LDIND_I);
5027 mono_mb_emit_icon (mb, G_STRUCT_OFFSET (MonoClass, instance_size));
5028 mono_mb_emit_byte (mb, CEE_ADD);
5029 /* FIXME: assert instance_size stays a 4 byte integer */
5030 mono_mb_emit_byte (mb, CEE_LDIND_U4);
5031 mono_mb_emit_stloc (mb, size_var);
5033 /* size += ALLOC_ALIGN - 1; */
5034 mono_mb_emit_ldloc (mb, size_var);
5035 mono_mb_emit_icon (mb, ALLOC_ALIGN - 1);
5036 mono_mb_emit_byte (mb, CEE_ADD);
5037 /* size &= ~(ALLOC_ALIGN - 1); */
5038 mono_mb_emit_icon (mb, ~(ALLOC_ALIGN - 1));
5039 mono_mb_emit_byte (mb, CEE_AND);
5040 mono_mb_emit_stloc (mb, size_var);
5043 * We need to modify tlab_next, but the JIT only supports reading, so we read
5044 * another tls var holding its address instead.
5047 /* tlab_next_addr (local) = tlab_next_addr (TLS var) */
5048 tlab_next_addr_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5049 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5050 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5051 mono_mb_emit_i4 (mb, tlab_next_addr_offset);
5052 mono_mb_emit_stloc (mb, tlab_next_addr_var);
5054 /* p = (void**)tlab_next; */
5055 p_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5056 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5057 mono_mb_emit_byte (mb, CEE_LDIND_I);
5058 mono_mb_emit_stloc (mb, p_var);
5060 /* new_next = (char*)p + size; */
5061 new_next_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
5062 mono_mb_emit_ldloc (mb, p_var);
5063 mono_mb_emit_ldloc (mb, size_var);
5064 mono_mb_emit_byte (mb, CEE_CONV_I);
5065 mono_mb_emit_byte (mb, CEE_ADD);
5066 mono_mb_emit_stloc (mb, new_next_var);
5068 /* tlab_next = new_next */
5069 mono_mb_emit_ldloc (mb, tlab_next_addr_var);
5070 mono_mb_emit_ldloc (mb, new_next_var);
5071 mono_mb_emit_byte (mb, CEE_STIND_I);
5073 /* if (G_LIKELY (new_next < tlab_temp_end)) */
5074 mono_mb_emit_ldloc (mb, new_next_var);
5075 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5076 mono_mb_emit_byte (mb, CEE_MONO_TLS);
5077 mono_mb_emit_i4 (mb, tlab_temp_end_offset);
5078 slowpath_branch = mono_mb_emit_short_branch (mb, MONO_CEE_BLT_UN_S);
5082 mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
5083 mono_mb_emit_byte (mb, CEE_MONO_NOT_TAKEN);
5085 /* FIXME: mono_gc_alloc_obj takes a 'size_t' as an argument, not an int32 */
5086 mono_mb_emit_ldarg (mb, 0);
5087 mono_mb_emit_ldloc (mb, size_var);
5088 mono_mb_emit_icall (mb, mono_gc_alloc_obj);
5089 mono_mb_emit_byte (mb, CEE_RET);
5092 mono_mb_patch_short_branch (mb, slowpath_branch);
5094 /* FIXME: Memory barrier */
5097 mono_mb_emit_ldloc (mb, p_var);
5098 mono_mb_emit_ldarg (mb, 0);
5099 mono_mb_emit_byte (mb, CEE_STIND_I);
5102 mono_mb_emit_ldloc (mb, p_var);
5103 mono_mb_emit_byte (mb, CEE_RET);
5105 res = mono_mb_create_method (mb, csig, 8);
5107 mono_method_get_header (res)->init_locals = FALSE;
5111 static MonoMethod* alloc_method_cache [ATYPE_NUM];
5114 * Generate an allocator method implementing the fast path of mono_gc_alloc_obj ().
5115 * The signature of the called method is:
5116 * object allocate (MonoVTable *vtable)
5119 mono_gc_get_managed_allocator (MonoVTable *vtable, gboolean for_box)
5121 int tlab_next_offset = -1;
5122 int tlab_temp_end_offset = -1;
5123 MonoClass *klass = vtable->klass;
5124 MONO_THREAD_VAR_OFFSET (tlab_next, tlab_next_offset);
5125 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
5127 if (tlab_next_offset == -1 || tlab_temp_end_offset == -1)
5129 if (klass->instance_size > tlab_size)
5131 if (klass->has_finalize || klass->marshalbyref || (mono_profiler_get_events () & MONO_PROFILE_ALLOCATIONS))
5135 if (klass->byval_arg.type == MONO_TYPE_STRING)
5137 if (collect_before_allocs)
5140 return mono_gc_get_managed_allocator_by_type (0);
5144 mono_gc_get_managed_allocator_type (MonoMethod *managed_alloc)
5150 mono_gc_get_managed_allocator_by_type (int atype)
5154 mono_loader_lock ();
5155 res = alloc_method_cache [atype];
5157 res = alloc_method_cache [atype] = create_allocator (atype);
5158 mono_loader_unlock ();
5162 #endif /* HAVE_SGEN_GC */