2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (firts vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
131 #include <semaphore.h>
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <sys/mman.h>
138 #include <sys/time.h>
141 #include "metadata/metadata-internals.h"
142 #include "metadata/class-internals.h"
143 #include "metadata/gc-internal.h"
144 #include "metadata/object-internals.h"
145 #include "metadata/threads.h"
146 #include "metadata/sgen-gc.h"
147 #include "metadata/mono-gc.h"
148 #include "utils/mono-mmap.h"
152 * ######################################################################
153 * ######## Types and constants used by the GC.
154 * ######################################################################
156 #if SIZEOF_VOID_P == 4
157 typedef guint32 mword;
159 typedef guint64 mword;
162 static int gc_initialized = 0;
163 static int gc_debug_level = 0;
164 static FILE* gc_debug_file;
167 mono_gc_flush_info (void)
169 fflush (gc_debug_file);
172 #define MAX_DEBUG_LEVEL 9
173 #define DEBUG(level,a) do {if ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level) a;} while (0)
175 #define TV_DECLARE(name) struct timeval name
176 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
177 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
179 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
189 /* each request from the OS ends up in a GCMemSection */
190 typedef struct _GCMemSection GCMemSection;
191 struct _GCMemSection {
195 /* pointer where more data could be allocated if it fits */
199 * scan starts is an array of pointers to objects equally spaced in the allocation area
200 * They let use quickly find pinned objects from pinning pointers.
203 /* in major collections indexes in the pin_queue for objects that pin this section */
206 unsigned short num_scan_start;
210 /* large object space struct: 64+ KB */
211 /* we could make this limit much smaller to avoid memcpy copy
212 * and potentially have more room in the GC descriptor: need to measure
213 * This also means that such small OS objects will need to be
214 * allocated in a different way (using pinned chunks).
215 * We may want to put large but smaller than 64k objects in the fixed space
216 * when we move the object from one generation to another (to limit the
217 * pig in the snake effect).
218 * Note: it may be worth to have an optimized copy function, since we can
219 * assume that objects are aligned and have a multiple of 8 size.
220 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
221 * true if MONO_ZERO_LEN_ARRAY is nonzero.
223 typedef struct _LOSObject LOSObject;
226 mword size; /* this is the object size */
227 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
229 char data [MONO_ZERO_LEN_ARRAY];
232 /* Pinned objects are allocated in the LOS space if bigger than half a page
233 * or from freelists otherwise. We assume that pinned objects are relatively few
234 * and they have a slow dying speed (like interned strings, thread objects).
235 * As such they will be collected only at major collections.
236 * free lists are not global: when we need memory we allocate a PinnedChunk.
237 * Each pinned chunk is made of several pages, the first of wich is used
238 * internally for bookeeping (here think of a page as 4KB). The bookeeping
239 * includes the freelists vectors and info about the object size of each page
240 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
241 * a size is assigned to it, the page is divided in the proper chunks and each
242 * chunk is added to the freelist. To not waste space, the remaining space in the
243 * first page is used as objects of size 16 or 32 (need to measure which are more
245 * We use this same structure to allocate memory used internally by the GC, so
246 * we never use malloc/free if we need to alloc during collection: the world is stopped
247 * and malloc/free will deadlock.
248 * When we want to iterate over pinned objects, we just scan a page at a time
249 * linearly according to the size of objects in the page: the next pointer used to link
250 * the items in the freelist uses the same word as the vtable. Since we keep freelists
251 * for each pinned chunk, if the word points outside the pinned chunk it means
253 * We could avoid this expensive scanning in creative ways. We could have a policy
254 * of putting in the pinned space only objects we know about that have no struct fields
255 * with references and we can easily use a even expensive write barrier for them,
256 * since pointer writes on such objects should be rare.
257 * The best compromise is to just alloc interned strings and System.MonoType in them.
258 * It would be nice to allocate MonoThread in it, too: must check that we properly
259 * use write barriers so we don't have to do any expensive scanning of the whole pinned
260 * chunk list during minor collections. We can avoid it now because we alloc in it only
261 * reference-free objects.
263 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
264 #define MAX_FREELIST_SIZE 2048
265 #define PINNED_PAGE_SIZE (4096)
266 #define PINNED_CHUNK_MIN_SIZE (4096*8)
267 typedef struct _PinnedChunk PinnedChunk;
268 struct _PinnedChunk {
271 int *page_sizes; /* a 0 means the page is still unused */
274 void *data [1]; /* page sizes and free lists are stored here */
278 * The young generation is divided into fragments. This is because
279 * we can hand one fragments to a thread for lock-less fast alloc and
280 * because the young generation ends up fragmented anyway by pinned objects.
281 * Once a collection is done, a list of fragments is created. When doing
282 * thread local alloc we use smallish nurseries so we allow new threads to
283 * allocate memory from gen0 without triggering a collection. Threads that
284 * are found to allocate lots of memory are given bigger fragments. This
285 * should make the finalizer thread use little nursery memory after a while.
286 * We should start assigning threads very small fragments: if there are many
287 * threads the nursery will be full of reserved space that the threads may not
288 * use at all, slowing down allocation speed.
290 typedef struct _Fragment Fragment;
294 char *fragment_start;
295 char *fragment_limit; /* the current soft limit for allocation */
299 /* the runtime can register areas of memory as roots: we keep two lists of roots,
300 * a pinned root set for conservatively scanned roots and a normal one for
301 * precisely scanned roots (currently implemented as a single list).
303 typedef struct _RootRecord RootRecord;
311 /* for use with write barriers */
312 typedef struct _RememberedSet RememberedSet;
313 struct _RememberedSet {
317 mword data [MONO_ZERO_LEN_ARRAY];
320 /* we have 4 possible values in the low 2 bits */
322 REMSET_LOCATION, /* just a pointer to the exact location */
323 REMSET_RANGE, /* range of pointer fields */
324 REMSET_OBJECT, /* mark all the object for scanning */
325 REMSET_TYPE_MASK = 0x3
328 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
329 static RememberedSet *global_remset;
330 static int store_to_global_remset = 0;
332 /* FIXME: later choose a size that takes into account the RememberedSet struct
333 * and doesn't waste any alloc paddin space.
335 #define DEFAULT_REMSET_SIZE 1024
336 static RememberedSet* alloc_remset (int size, gpointer id);
338 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
339 * no cast from a pointer to an integer
346 /* these bits are set in the object vtable: we could merge them since an object can be
347 * either pinned or forwarded but not both.
348 * We store them in the vtable slot because the bits are used in the sync block for
349 * other purpouses: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
350 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
351 * would be an invalid combination for the monitor and hash code).
352 * The values are already shifted.
353 * The forwarding address is stored in the sync block.
355 #define FORWARDED_BIT 1
357 #define VTABLE_BITS_MASK 0x3
359 /* returns NULL if not forwarded, or the forwarded address */
360 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
361 /* set the forwarded address fw_addr for object obj */
362 #define forward_object(obj,fw_addr) do { \
363 ((mword*)(obj))[0] |= FORWARDED_BIT; \
364 ((mword*)(obj))[1] = (mword)(fw_addr); \
367 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
368 #define pin_object(obj) do { \
369 ((mword*)(obj))[0] |= PINNED_BIT; \
371 #define unpin_object(obj) do { \
372 ((mword*)(obj))[0] &= ~PINNED_BIT; \
377 * Since we set bits in the vtable, use the macro to load it from the pointer to
378 * an object that is potentially pinned.
380 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
383 safe_name (void* obj)
385 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
386 return vt->klass->name;
390 safe_object_get_size (MonoObject* o)
392 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
393 if (klass == mono_defaults.string_class) {
394 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
395 } else if (klass->rank) {
396 MonoArray *array = (MonoArray*)o;
397 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
401 size += sizeof (MonoArrayBounds) * klass->rank;
405 /* from a created object: the class must be inited already */
406 return klass->instance_size;
411 * ######################################################################
412 * ######## Global data.
413 * ######################################################################
415 static pthread_mutex_t gc_mutex = PTHREAD_MUTEX_INITIALIZER;
416 static int gc_disabled = 0;
417 static int num_minor_gcs = 0;
418 static int num_major_gcs = 0;
420 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
421 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
422 #define DEFAULT_NURSERY_SIZE (1024*512*2)
423 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
424 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
425 /* to quickly find the heard of an object pinned by a conservative address
426 * we keep track of the objects allocated for each SCAN_START_SIZE memory
427 * chunk in the nursery or other memory sections. Larger values have less
428 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
430 #define SCAN_START_SIZE (4096*2)
431 /* the minimum size of a fragment that we consider useful for allocation */
432 #define FRAGMENT_MIN_SIZE (512)
433 /* This is a fixed value used for pinned chunks, not the system pagesize */
434 #define FREELIST_PAGESIZE 4096
436 static mword pagesize = 4096;
437 static mword nursery_size = DEFAULT_NURSERY_SIZE;
438 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
439 static mword max_section_size = DEFAULT_MAX_SECTION;
440 static int section_size_used = 0;
441 static int degraded_mode = 0;
443 static LOSObject *los_object_list = NULL;
444 static mword los_memory_usage = 0;
445 static mword los_num_objects = 0;
446 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
447 static mword total_alloc = 0;
448 /* use this to tune when to do a major/minor collection */
449 static mword memory_pressure = 0;
451 static GCMemSection *section_list = NULL;
452 static GCMemSection *nursery_section = NULL;
453 static mword lowest_heap_address = ~(mword)0;
454 static mword highest_heap_address = 0;
456 typedef struct _FinalizeEntry FinalizeEntry;
457 struct _FinalizeEntry {
460 void *data; /* can be a disappearing link or the data for the finalizer */
461 /* Note we could use just one pointer if we don't support multiple callbacks
462 * for finalizers and per-finalizer data and if we store the obj pointers
463 * in the link like libgc does
468 * The finalizable hash has the object as the key, the
469 * disappearing_link hash, has the link address as key.
471 static FinalizeEntry **finalizable_hash = NULL;
472 /* objects that are ready to be finalized */
473 static FinalizeEntry *fin_ready_list = NULL;
474 /* disappearing links use the same structure but a different list */
475 static FinalizeEntry **disappearing_link_hash = NULL;
476 static mword disappearing_link_hash_size = 0;
477 static mword finalizable_hash_size = 0;
479 static mword num_registered_finalizers = 0;
480 static mword num_ready_finalizers = 0;
481 static mword num_disappearing_links = 0;
482 static int no_finalize = 0;
484 /* keep each size a multiple of ALLOC_ALIGN */
485 /* on 64 bit systems 8 is likely completely unused. */
486 static const int freelist_sizes [] = {
487 8, 16, 24, 32, 40, 48, 64, 80,
488 96, 128, 160, 192, 224, 256, 320, 384,
489 448, 512, 584, 680, 816, 1024, 1360, 2048};
490 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
492 static char* max_pinned_chunk_addr = NULL;
493 static char* min_pinned_chunk_addr = (char*)-1;
494 /* pinned_chunk_list is used for allocations of objects that are never moved */
495 static PinnedChunk *pinned_chunk_list = NULL;
496 /* internal_chunk_list is used for allocating structures needed by the GC */
497 static PinnedChunk *internal_chunk_list = NULL;
500 obj_is_from_pinned_alloc (char *p)
502 PinnedChunk *chunk = pinned_chunk_list;
503 for (; chunk; chunk = chunk->next) {
504 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
510 /* registered roots: the key to the hash is the root start address */
511 static RootRecord **roots_hash = NULL;
512 static int roots_hash_size = 0;
513 static mword roots_size = 0; /* amount of memory in the root set */
514 static mword num_roots_entries = 0;
517 * The current allocation cursors
518 * We allocate objects in the nursery.
519 * The nursery is the area between nursery_start and nursery_real_end.
520 * nursery_next is the pointer to the space where the next object will be allocated.
521 * nursery_temp_end is the pointer to the end of the temporary space reserved for
522 * the allocation: this allows us to allow allocations inside the fragments of the
523 * nursery (the empty holes between pinned objects) and it allows us to set the
524 * scan starts at reasonable intervals.
525 * nursery_next and nursery_temp_end will become per-thread vars to allow lock-free
527 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
528 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
529 * At the next allocation, the area of the nursery where objects can be present is
530 * between MIN(nursery_first_pinned_start, first_fragment_start) and
531 * MAX(nursery_last_pinned_end, nursery_temp_end)
533 static char *nursery_start = NULL;
534 static char *nursery_next = NULL;
535 static char *nursery_temp_end = NULL;
536 static char *nursery_real_end = NULL;
537 static char *nursery_frag_real_end = NULL;
538 static char *nursery_first_pinned_start = NULL;
539 static char *nursery_last_pinned_end = NULL;
541 /* fragments that are free and ready to be used for allocation */
542 static Fragment *nursery_fragments = NULL;
543 /* freeelist of fragment structures */
544 static Fragment *fragment_freelist = NULL;
547 * used when moving the objects
548 * When the nursery is collected, objects are copied to to_space.
549 * The area between to_space and gray_objects is used as a stack
550 * of objects that need their fields checked for more references
552 * We should optimize somehow this mechanism to avoid rescanning
553 * ptr-free objects. The order is also probably not optimal: need to
554 * test cache misses and other graph traversal orders.
556 static char *to_space = NULL;
557 static char *gray_objects = NULL;
558 static char *to_space_end = NULL;
559 static GCMemSection *to_space_section = NULL;
561 /* objects bigger then this go into the large object space */
562 #define MAX_SMALL_OBJ_SIZE 0xffff
565 * ######################################################################
566 * ######## Macros and function declarations.
567 * ######################################################################
571 * Recursion is not allowed for the thread lock.
573 #define LOCK_GC pthread_mutex_lock (&gc_mutex)
574 #define UNLOCK_GC pthread_mutex_unlock (&gc_mutex)
576 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
577 if ((mword)(low) < lowest_heap_address) \
578 lowest_heap_address = (mword)(low); \
579 if ((mword)(high) > highest_heap_address) \
580 highest_heap_address = (mword)(high); \
584 align_pointer (void *ptr)
586 mword p = (mword)ptr;
587 p += sizeof (gpointer) - 1;
588 p &= ~ (sizeof (gpointer) - 1);
592 /* forward declarations */
593 static void* get_internal_mem (size_t size);
594 static void free_internal_mem (void *addr);
595 static void* get_os_memory (size_t size, int activate);
596 static void free_os_memory (void *addr, size_t size);
597 static void report_internal_mem_usage (void);
599 static int stop_world (void);
600 static int restart_world (void);
601 static void pin_thread_data (void *start_nursery, void *end_nursery);
602 static void scan_from_remsets (void *start_nursery, void *end_nursery);
603 static void find_pinning_ref_from_thread (char *obj, size_t size);
604 static void update_current_thread_stack (void *start);
605 static GCMemSection* alloc_section (size_t size);
606 static void finalize_in_range (void **start, void **end);
607 static void null_link_in_range (void **start, void **end);
608 static gboolean search_fragment_for_size (size_t size);
609 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
610 static void clear_remsets (void);
611 static void sweep_pinned_objects (void);
612 static void free_large_object (LOSObject *obj);
613 static void free_mem_section (GCMemSection *section);
616 * ######################################################################
617 * ######## GC descriptors
618 * ######################################################################
619 * Used to quickly get the info the GC needs about an object: size and
620 * where the references are held.
622 /* objects are aligned to 8 bytes boundaries
623 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
624 * The low 3 bits define the type of the descriptor. The other bits
625 * depend on the type.
626 * As a general rule the 13 remaining low bits define the size, either
627 * of the whole object or of the elements in the arrays. While for objects
628 * the size is already in bytes, for arrays we need to shift, because
629 * array elements might be smaller than 8 bytes. In case of arrays, we
630 * use two bits to describe what the additional high bits represents,
631 * so the default behaviour can handle element sizes less than 2048 bytes.
632 * The high 16 bits, if 0 it means the object is pointer-free.
633 * This design should make it easy and fast to skip over ptr-free data.
634 * The first 4 types should cover >95% of the objects.
635 * Note that since the size of objects is limited to 64K, larger objects
636 * will be allocated in the large object heap.
637 * If we want 4-bytes alignment, we need to put vector and small bitmap
641 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
642 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
643 DESC_TYPE_STRING, /* nothing */
644 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
645 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
646 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
647 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
648 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
649 /* subtypes for arrays and vectors */
650 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
651 DESC_TYPE_V_REFS, /* all the array elements are refs */
652 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
653 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
656 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
657 #define LOW_TYPE_BITS 3
658 #define SMALL_BITMAP_SHIFT 16
659 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
660 #define VECTOR_INFO_SHIFT 14
661 #define VECTOR_ELSIZE_SHIFT 3
662 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
663 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
664 #define SMALL_SIZE_MASK 0xfff8
665 #define MAX_ELEMENT_SIZE 0x3ff
666 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
667 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
668 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
669 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
670 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
672 #define ALLOC_ALIGN 8
675 /* Root bitmap descriptors are simpler: the lower two bits describe the type
676 * and we either have 30/62 bitmap bits or nibble-based run-length,
677 * or a complex descriptor
680 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
683 ROOT_DESC_LARGE_BITMAP,
684 ROOT_DESC_TYPE_MASK = 0x3,
685 ROOT_DESC_TYPE_SHIFT = 2,
688 static gsize* complex_descriptors = NULL;
689 static int complex_descriptors_size = 0;
690 static int complex_descriptors_next = 0;
693 alloc_complex_descriptor (gsize *bitmap, int numbits)
695 int nwords = numbits/GC_BITS_PER_WORD + 2;
700 res = complex_descriptors_next;
701 /* linear search, so we don't have duplicates with domain load/unload
702 * this should not be performance critical or we'd have bigger issues
703 * (the number and size of complex descriptors should be small).
705 for (i = 0; i < complex_descriptors_next; ) {
706 if (complex_descriptors [i] == nwords) {
708 for (j = 0; j < nwords - 1; ++j) {
709 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
719 i += complex_descriptors [i];
721 if (complex_descriptors_next + nwords > complex_descriptors_size) {
722 int new_size = complex_descriptors_size * 2 + nwords;
723 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
724 complex_descriptors_size = new_size;
726 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
727 complex_descriptors_next += nwords;
728 complex_descriptors [res] = nwords;
729 for (i = 0; i < nwords - 1; ++i) {
730 complex_descriptors [res + 1 + i] = bitmap [i];
731 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
738 * Descriptor builders.
741 mono_gc_make_descr_for_string (void)
743 return (void*) DESC_TYPE_STRING;
747 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
749 int first_set = -1, num_set = 0, last_set = -1, i;
751 size_t stored_size = obj_size;
752 stored_size += ALLOC_ALIGN - 1;
753 stored_size &= ~(ALLOC_ALIGN - 1);
754 for (i = 0; i < numbits; ++i) {
755 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
762 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
763 /* check run-length encoding first: one byte offset, one byte number of pointers
764 * on 64 bit archs, we can have 3 runs, just one on 32.
765 * It may be better to use nibbles.
768 desc = DESC_TYPE_RUN_LENGTH | stored_size;
769 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %d\n", (void*)desc, stored_size));
771 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
772 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
773 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %d, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
776 /* we know the 2-word header is ptr-free */
777 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
778 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
779 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
783 /* we know the 2-word header is ptr-free */
784 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
785 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
786 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
789 /* it's a complex object ... */
790 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
794 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
796 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
798 int first_set = -1, num_set = 0, last_set = -1, i;
799 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
800 for (i = 0; i < numbits; ++i) {
801 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
808 if (elem_size <= MAX_ELEMENT_SIZE) {
809 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
811 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
813 /* Note: we also handle structs with just ref fields */
814 if (num_set * sizeof (gpointer) == elem_size) {
815 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((-1LL) << 16));
817 /* FIXME: try run-len first */
818 /* Note: we can't skip the object header here, because it's not present */
819 if (last_set <= SMALL_BITMAP_SIZE) {
820 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
823 /* it's am array of complex structs ... */
824 desc = DESC_TYPE_COMPLEX_ARR;
825 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
829 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
830 #define STRING_SIZE(size,str) do { \
831 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
832 (size) += (ALLOC_ALIGN - 1); \
833 (size) &= ~(ALLOC_ALIGN - 1); \
836 #define OBJ_RUN_LEN_SIZE(size,vt,obj) do { \
837 (size) = (vt)->desc & 0xfff8; \
840 #define OBJ_BITMAP_SIZE(size,vt,obj) do { \
841 (size) = (vt)->desc & 0xfff8; \
844 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
845 #define PREFETCH(addr)
847 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
848 #define OBJ_RUN_LEN_FOREACH_PTR(vt,obj) do { \
849 if ((vt)->desc & 0xffff0000) { \
850 /* there are pointers */ \
851 void **_objptr_end; \
852 void **_objptr = (void**)(obj); \
853 _objptr += ((vt)->desc >> 16) & 0xff; \
854 _objptr_end = _objptr + (((vt)->desc >> 24) & 0xff); \
855 while (_objptr < _objptr_end) { \
856 HANDLE_PTR (_objptr, (obj)); \
862 /* a bitmap desc means that there are pointer references or we'd have
863 * choosen run-length, instead: add an assert to check.
865 #define OBJ_BITMAP_FOREACH_PTR(vt,obj) do { \
866 /* there are pointers */ \
867 void **_objptr = (void**)(obj); \
868 gsize _bmap = (vt)->desc >> 16; \
869 _objptr += OBJECT_HEADER_WORDS; \
872 HANDLE_PTR (_objptr, (obj)); \
879 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
880 /* there are pointers */ \
881 void **_objptr = (void**)(obj); \
882 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
883 _objptr += OBJECT_HEADER_WORDS; \
886 HANDLE_PTR (_objptr, (obj)); \
893 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
894 /* there are pointers */ \
895 void **_objptr = (void**)(obj); \
896 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
897 int bwords = (*bitmap_data) - 1; \
898 void **start_run = _objptr; \
901 MonoObject *myobj = (MonoObject*)obj; \
902 g_print ("found %d at %p (0x%x): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
904 while (bwords-- > 0) { \
905 gsize _bmap = *bitmap_data++; \
906 _objptr = start_run; \
907 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
910 HANDLE_PTR (_objptr, (obj)); \
915 start_run += GC_BITS_PER_WORD; \
919 /* this one is untested */
920 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
921 /* there are pointers */ \
922 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
923 int mbwords = (*mbitmap_data++) - 1; \
924 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
925 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
926 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
928 MonoObject *myobj = (MonoObject*)start; \
929 g_print ("found %d at %p (0x%x): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
931 while (e_start < e_end) { \
932 void **_objptr = (void**)e_start; \
933 gsize *bitmap_data = mbitmap_data; \
934 unsigned int bwords = mbwords; \
935 while (bwords-- > 0) { \
936 gsize _bmap = *bitmap_data++; \
937 void **start_run = _objptr; \
938 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
941 HANDLE_PTR (_objptr, (obj)); \
946 _objptr = start_run + GC_BITS_PER_WORD; \
948 e_start += el_size; \
952 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
953 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
954 if ((vt)->desc & 0xffffc000) { \
955 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
956 /* there are pointers */ \
957 int etype = (vt)->desc & 0xc000; \
958 if (etype == (DESC_TYPE_V_REFS << 14)) { \
959 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
960 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
961 /* Note: this code can handle also arrays of struct with only references in them */ \
962 while (p < end_refs) { \
963 HANDLE_PTR (p, (obj)); \
966 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
967 int offset = ((vt)->desc >> 16) & 0xff; \
968 int num_refs = ((vt)->desc >> 24) & 0xff; \
969 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
970 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
971 while (e_start < e_end) { \
972 void **p = (void**)e_start; \
975 for (i = 0; i < num_refs; ++i) { \
976 HANDLE_PTR (p + i, (obj)); \
978 e_start += el_size; \
980 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
981 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
982 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
983 while (e_start < e_end) { \
984 void **p = (void**)e_start; \
985 gsize _bmap = (vt)->desc >> 16; \
986 /* Note: there is no object header here to skip */ \
989 HANDLE_PTR (p, (obj)); \
994 e_start += el_size; \
1000 static mword new_obj_references = 0;
1001 static mword obj_references_checked = 0;
1004 #define HANDLE_PTR(ptr,obj) do { \
1005 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1006 new_obj_references++; \
1007 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1009 obj_references_checked++; \
1014 * ######################################################################
1015 * ######## Detecting and removing garbage.
1016 * ######################################################################
1017 * This section of code deals with detecting the objects no longer in use
1018 * and reclaiming the memory.
1020 static void __attribute__((noinline))
1021 scan_area (char *start, char *end)
1026 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1027 new_obj_references = 0;
1028 obj_references_checked = 0;
1029 while (start < end) {
1030 if (!*(void**)start) {
1031 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1034 vt = (GCVTable*)LOAD_VTABLE (start);
1035 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1037 MonoObject *obj = (MonoObject*)start;
1038 g_print ("found at %p (0x%x): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1040 type = vt->desc & 0x7;
1041 if (type == DESC_TYPE_STRING) {
1042 STRING_SIZE (skip_size, start);
1046 } else if (type == DESC_TYPE_RUN_LENGTH) {
1047 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1048 g_assert (skip_size);
1049 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1053 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1054 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1055 skip_size *= mono_array_length ((MonoArray*)start);
1056 skip_size += sizeof (MonoArray);
1057 skip_size += (ALLOC_ALIGN - 1);
1058 skip_size &= ~(ALLOC_ALIGN - 1);
1059 OBJ_VECTOR_FOREACH_PTR (vt, start);
1060 if (type == DESC_TYPE_ARRAY) {
1061 /* account for the bounds */
1066 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1067 OBJ_BITMAP_SIZE (skip_size, vt, start);
1068 g_assert (skip_size);
1069 OBJ_BITMAP_FOREACH_PTR (vt,start);
1073 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1074 skip_size = safe_object_get_size ((MonoObject*)start);
1075 skip_size += (ALLOC_ALIGN - 1);
1076 skip_size &= ~(ALLOC_ALIGN - 1);
1077 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1081 } else if (type == DESC_TYPE_COMPLEX) {
1082 /* this is a complex object */
1083 skip_size = safe_object_get_size ((MonoObject*)start);
1084 skip_size += (ALLOC_ALIGN - 1);
1085 skip_size &= ~(ALLOC_ALIGN - 1);
1086 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1090 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1091 /* this is an array of complex structs */
1092 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1093 skip_size *= mono_array_length ((MonoArray*)start);
1094 skip_size += sizeof (MonoArray);
1095 skip_size += (ALLOC_ALIGN - 1);
1096 skip_size &= ~(ALLOC_ALIGN - 1);
1097 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1098 if (type == DESC_TYPE_ARRAY) {
1099 /* account for the bounds */
1108 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1109 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1110 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1113 static void __attribute__((noinline))
1114 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1119 while (start < end) {
1120 if (!*(void**)start) {
1121 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1124 vt = (GCVTable*)LOAD_VTABLE (start);
1125 /* handle threads someway (maybe insert the root domain vtable?) */
1126 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1127 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1132 type = vt->desc & 0x7;
1133 if (type == DESC_TYPE_STRING) {
1134 STRING_SIZE (skip_size, start);
1135 if (remove) memset (start, 0, skip_size);
1138 } else if (type == DESC_TYPE_RUN_LENGTH) {
1139 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1140 g_assert (skip_size);
1141 if (remove) memset (start, 0, skip_size);
1144 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1145 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1146 skip_size *= mono_array_length ((MonoArray*)start);
1147 skip_size += sizeof (MonoArray);
1148 skip_size += (ALLOC_ALIGN - 1);
1149 skip_size &= ~(ALLOC_ALIGN - 1);
1150 if (type == DESC_TYPE_ARRAY) {
1151 /* account for the bounds */
1153 if (remove) memset (start, 0, skip_size);
1156 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1157 OBJ_BITMAP_SIZE (skip_size, vt, start);
1158 g_assert (skip_size);
1159 if (remove) memset (start, 0, skip_size);
1162 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1163 skip_size = safe_object_get_size ((MonoObject*)start);
1164 skip_size += (ALLOC_ALIGN - 1);
1165 skip_size &= ~(ALLOC_ALIGN - 1);
1166 if (remove) memset (start, 0, skip_size);
1169 } else if (type == DESC_TYPE_COMPLEX) {
1170 /* this is a complex object */
1171 skip_size = safe_object_get_size ((MonoObject*)start);
1172 skip_size += (ALLOC_ALIGN - 1);
1173 skip_size &= ~(ALLOC_ALIGN - 1);
1174 if (remove) memset (start, 0, skip_size);
1177 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1178 /* this is an array of complex structs */
1179 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1180 skip_size *= mono_array_length ((MonoArray*)start);
1181 skip_size += sizeof (MonoArray);
1182 skip_size += (ALLOC_ALIGN - 1);
1183 skip_size &= ~(ALLOC_ALIGN - 1);
1184 if (type == DESC_TYPE_ARRAY) {
1185 /* account for the bounds */
1187 if (remove) memset (start, 0, skip_size);
1197 * When appdomains are unloaded we can easily remove objects that have finalizers,
1198 * but all the others could still be present in random places on the heap.
1199 * We need a sweep to get rid of them even though it's going to be costly
1201 * The reason we need to remove them is because we access the vtable and class
1202 * structures to know the object size and the reference bitmap: once the domain is
1203 * unloaded the point to random memory.
1206 mono_gc_clear_domain (MonoDomain * domain)
1208 GCMemSection *section;
1210 for (section = section_list; section; section = section->next) {
1211 scan_area_for_domain (domain, section->data, section->end_data);
1213 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1218 add_to_global_remset (gpointer ptr)
1221 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1222 if (global_remset->store_next < global_remset->end_set) {
1223 *(global_remset->store_next++) = (mword)ptr;
1226 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1227 rs->next = global_remset;
1229 *(global_remset->store_next++) = (mword)ptr;
1233 * This is how the copying happens from the nursery to the old generation.
1234 * We assume that at this time all the pinned objects have been identified and
1236 * We run scan_object() for each pinned object so that each referenced
1237 * objects if possible are copied. The new gray objects created can have
1238 * scan_object() run on them right away, too.
1239 * Then we run copy_object() for the precisely tracked roots. At this point
1240 * all the roots are either gray or black. We run scan_object() on the gray
1241 * objects until no more gray objects are created.
1242 * At the end of the process we walk again the pinned list and we unmark
1243 * the pinned flag. As we go we also create the list of free space for use
1244 * in the next allocation runs.
1246 * We need to remember objects from the old generation that point to the new one
1247 * (or just addresses?).
1249 * copy_object could be made into a macro once debugged (use inline for now).
1252 static char* __attribute__((noinline))
1253 copy_object (char *obj, char *from_space_start, char *from_space_end)
1255 if (obj >= from_space_start && obj < from_space_end) {
1259 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1260 if ((forwarded = object_is_forwarded (obj))) {
1261 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1262 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1265 if (object_is_pinned (obj)) {
1266 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1267 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1270 objsize = safe_object_get_size ((MonoObject*)obj);
1271 objsize += ALLOC_ALIGN - 1;
1272 objsize &= ~(ALLOC_ALIGN - 1);
1273 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %d)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1274 /* FIXME: handle pinned allocs:
1275 * Large objects are simple, at least until we always follow the rule:
1276 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1277 * At the end of major collections, we walk the los list and if
1278 * the object is pinned, it is marked, otherwise it can be freed.
1280 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1281 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %d\n", obj, safe_name (obj), objsize));
1285 /* ok, the object is not pinned, we can move it */
1286 /* use a optimized memcpy here */
1291 char* edi = gray_objects;
1292 __asm__ __volatile__(
1294 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1295 : "0" (objsize/4), "1" (edi),"2" (esi)
1300 memcpy (gray_objects, obj, objsize);
1302 /* adjust array->bounds */
1303 vt = ((MonoObject*)obj)->vtable;
1304 g_assert (vt->gc_descr);
1305 if (vt->rank && ((MonoArray*)obj)->bounds) {
1306 MonoArray *array = (MonoArray*)gray_objects;
1307 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1308 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %d, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1310 /* set the forwarding pointer */
1311 forward_object (obj, gray_objects);
1313 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1314 gray_objects += objsize;
1315 DEBUG (8, g_assert (gray_objects <= to_space_end));
1322 #define HANDLE_PTR(ptr,obj) do { \
1324 void *__old = *(ptr); \
1325 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1326 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1327 if (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) \
1328 add_to_global_remset ((ptr)); \
1333 * Scan the object pointed to by @start for references to
1334 * other objects between @from_start and @from_end and copy
1335 * them to the gray_objects area.
1336 * Returns a pointer to the end of the object.
1339 scan_object (char *start, char* from_start, char* from_end)
1344 vt = (GCVTable*)LOAD_VTABLE (start);
1345 //type = vt->desc & 0x7;
1347 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1348 switch (vt->desc & 0x7) {
1349 //if (type == DESC_TYPE_STRING) {
1350 case DESC_TYPE_STRING:
1351 STRING_SIZE (skip_size, start);
1352 return start + skip_size;
1353 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1354 case DESC_TYPE_RUN_LENGTH:
1355 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1356 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1357 g_assert (skip_size);
1358 return start + skip_size;
1359 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1360 case DESC_TYPE_ARRAY:
1361 case DESC_TYPE_VECTOR:
1362 OBJ_VECTOR_FOREACH_PTR (vt, start);
1363 skip_size = safe_object_get_size ((MonoObject*)start);
1365 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1366 skip_size *= mono_array_length ((MonoArray*)start);
1367 skip_size += sizeof (MonoArray);
1369 skip_size += (ALLOC_ALIGN - 1);
1370 skip_size &= ~(ALLOC_ALIGN - 1);
1371 return start + skip_size;
1372 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1373 case DESC_TYPE_SMALL_BITMAP:
1374 OBJ_BITMAP_FOREACH_PTR (vt,start);
1375 OBJ_BITMAP_SIZE (skip_size, vt, start);
1376 return start + skip_size;
1377 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1378 case DESC_TYPE_LARGE_BITMAP:
1379 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1380 skip_size = safe_object_get_size ((MonoObject*)start);
1381 skip_size += (ALLOC_ALIGN - 1);
1382 skip_size &= ~(ALLOC_ALIGN - 1);
1383 return start + skip_size;
1384 //} else if (type == DESC_TYPE_COMPLEX) {
1385 case DESC_TYPE_COMPLEX:
1386 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1387 /* this is a complex object */
1388 skip_size = safe_object_get_size ((MonoObject*)start);
1389 skip_size += (ALLOC_ALIGN - 1);
1390 skip_size &= ~(ALLOC_ALIGN - 1);
1391 return start + skip_size;
1392 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1393 case DESC_TYPE_COMPLEX_ARR:
1394 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1395 /* this is an array of complex structs */
1396 skip_size = safe_object_get_size ((MonoObject*)start);
1398 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1399 skip_size *= mono_array_length ((MonoArray*)start);
1400 skip_size += sizeof (MonoArray);
1402 skip_size += (ALLOC_ALIGN - 1);
1403 skip_size &= ~(ALLOC_ALIGN - 1);
1404 return start + skip_size;
1406 g_assert_not_reached ();
1411 * Addresses from start to end are already sorted. This function finds the object header
1412 * for each address and pins the object. The addresses must be inside the passed section.
1413 * Return the number of pinned objects.
1416 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1421 void *last_obj = NULL;
1422 size_t last_obj_size = 0;
1425 void **definitely_pinned = start;
1426 while (start < end) {
1428 /* the range check should be reduntant */
1429 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1430 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1431 /* multiple pointers to the same object */
1432 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1436 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1437 search_start = (void*)section->scan_starts [idx];
1438 if (!search_start || search_start > addr) {
1441 search_start = section->scan_starts [idx];
1442 if (search_start && search_start <= addr)
1445 if (!search_start || search_start > addr)
1446 search_start = start_nursery;
1448 if (search_start < last_obj)
1449 search_start = (char*)last_obj + last_obj_size;
1450 /* now addr should be in an object a short distance from search_start
1451 * Note that search_start must point to zeroed mem or point to an object.
1454 if (!*(void**)search_start) {
1455 mword p = (mword)search_start;
1456 p += sizeof (gpointer);
1457 p += ALLOC_ALIGN - 1;
1458 p &= ~(ALLOC_ALIGN - 1);
1459 search_start = (void*)p;
1462 last_obj = search_start;
1463 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1464 last_obj_size += ALLOC_ALIGN - 1;
1465 last_obj_size &= ~(ALLOC_ALIGN - 1);
1466 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %d\n", last_obj, safe_name (last_obj), last_obj_size));
1467 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1468 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1469 pin_object (search_start);
1470 definitely_pinned [count] = search_start;
1474 /* skip to the next object */
1475 search_start = (void*)((char*)search_start + last_obj_size);
1476 } while (search_start <= addr);
1477 /* we either pinned the correct object or we ignored the addr because
1478 * it points to unused zeroed memory.
1484 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1488 static void** pin_queue;
1489 static int pin_queue_size = 0;
1490 static int next_pin_slot = 0;
1495 gap = (gap * 10) / 13;
1496 if (gap == 9 || gap == 10)
1505 compare_addr (const void *a, const void *b)
1507 return *(const void **)a - *(const void **)b;
1511 /* sort the addresses in array in increasing order */
1513 sort_addresses (void **array, int size)
1516 * qsort is slower as predicted.
1517 * qsort (array, size, sizeof (gpointer), compare_addr);
1524 gap = new_gap (gap);
1527 for (i = 0; i < end; i++) {
1529 if (array [i] > array [j]) {
1530 void* val = array [i];
1531 array [i] = array [j];
1536 if (gap == 1 && !swapped)
1542 print_nursery_gaps (void* start_nursery, void *end_nursery)
1545 gpointer first = start_nursery;
1547 for (i = 0; i < next_pin_slot; ++i) {
1548 next = pin_queue [i];
1549 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1553 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1556 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1558 optimize_pin_queue (int start_slot)
1560 void **start, **cur, **end;
1561 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1562 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1563 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1564 if ((next_pin_slot - start_slot) > 1)
1565 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1566 start = cur = pin_queue + start_slot;
1567 end = pin_queue + next_pin_slot;
1570 while (*start == *cur && cur < end)
1574 next_pin_slot = start - pin_queue;
1575 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1576 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1581 realloc_pin_queue (void)
1583 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1584 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1585 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1586 free_internal_mem (pin_queue);
1587 pin_queue = new_pin;
1588 pin_queue_size = new_size;
1589 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1593 * Scan the memory between start and end and queue values which could be pointers
1594 * to the area between start_nursery and end_nursery for later consideration.
1595 * Typically used for thread stacks.
1598 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1601 while (start < end) {
1602 if (*start >= start_nursery && *start < end_nursery) {
1604 * *start can point to the middle of an object
1605 * note: should we handle pointing at the end of an object?
1606 * pinning in C# code disallows pointing at the end of an object
1607 * but there is some small chance that an optimizing C compiler
1608 * may keep the only reference to an object by pointing
1609 * at the end of it. We ignore this small chance for now.
1610 * Pointers to the end of an object are indistinguishable
1611 * from pointers to the start of the next object in memory
1612 * so if we allow that we'd need to pin two objects...
1613 * We queue the pointer in an array, the
1614 * array will then be sorted and uniqued. This way
1615 * we can coalesce several pinning pointers and it should
1616 * be faster since we'd do a memory scan with increasing
1617 * addresses. Note: we can align the address to the allocation
1618 * alignment, so the unique process is more effective.
1620 mword addr = (mword)*start;
1621 addr &= ~(ALLOC_ALIGN - 1);
1622 if (next_pin_slot >= pin_queue_size)
1623 realloc_pin_queue ();
1624 pin_queue [next_pin_slot++] = (void*)addr;
1625 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1630 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1634 * If generation is 0, just mark objects in the nursery, the others we don't care,
1635 * since they are not going to move anyway.
1636 * There are different areas that are scanned for pinned pointers:
1637 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1638 * *) the pinned handle table
1639 * *) the pinned roots
1641 * Note: when we'll use a write barrier for old to new gen references, we need to
1642 * keep track of old gen objects that point to pinned new gen objects because in that
1643 * case the referenced object will be moved maybe at the next collection, but there
1644 * is no write in the old generation area where the pinned object is referenced
1645 * and we may not consider it as reachable.
1648 mark_pinned_objects (int generation)
1653 * Debugging function: find in the conservative roots where @obj is being pinned.
1656 find_pinning_reference (char *obj, size_t size)
1660 char *endobj = obj + size;
1661 for (i = 0; i < roots_hash_size; ++i) {
1662 for (root = roots_hash [i]; root; root = root->next) {
1663 /* if desc is non-null it has precise info */
1664 if (!root->root_desc) {
1665 char ** start = (char**)root->start_root;
1666 while (start < (char**)root->end_root) {
1667 if (*start >= obj && *start < endobj) {
1668 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1675 find_pinning_ref_from_thread (obj, size);
1679 * The first thing we do in a collection is to identify pinned objects.
1680 * This function considers all the areas of memory that need to be
1681 * conservatively scanned.
1684 pin_from_roots (void *start_nursery, void *end_nursery)
1688 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1689 /* objects pinned from the API are inside these roots */
1690 for (i = 0; i < roots_hash_size; ++i) {
1691 for (root = roots_hash [i]; root; root = root->next) {
1692 /* if desc is non-null it has precise info */
1693 if (root->root_desc)
1695 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1696 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1699 /* now deal with the thread stacks
1700 * in the future we should be able to conservatively scan only:
1701 * *) the cpu registers
1702 * *) the unmanaged stack frames
1703 * *) the _last_ managed stack frame
1704 * *) pointers slots in managed frames
1706 pin_thread_data (start_nursery, end_nursery);
1710 * The memory area from start_root to end_root contains pointers to objects.
1711 * Their position is precisely described by @desc (this means that the pointer
1712 * can be either NULL or the pointer to the start of an object).
1713 * This functions copies them to to_space updates them.
1716 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1718 switch (desc & ROOT_DESC_TYPE_MASK) {
1719 case ROOT_DESC_BITMAP:
1720 desc >>= ROOT_DESC_TYPE_SHIFT;
1722 if ((desc & 1) && *start_root) {
1723 *start_root = copy_object (*start_root, n_start, n_end);
1724 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1730 case ROOT_DESC_RUN_LEN:
1731 case ROOT_DESC_LARGE_BITMAP:
1732 g_assert_not_reached ();
1737 alloc_fragment (void)
1739 Fragment *frag = fragment_freelist;
1741 fragment_freelist = frag->next;
1745 frag = get_internal_mem (sizeof (Fragment));
1751 * Allocate and setup the data structures needed to be able to allocate objects
1752 * in the nursery. The nursery is stored in nursery_section.
1755 alloc_nursery (void)
1757 GCMemSection *section;
1762 if (nursery_section)
1764 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %d\n", nursery_size));
1765 /* later we will alloc a larger area for the nursery but only activate
1766 * what we need. The rest will be used as expansion if we have too many pinned
1767 * objects in the existing nursery.
1769 /* FIXME: handle OOM */
1770 section = get_internal_mem (sizeof (GCMemSection));
1771 data = get_os_memory (nursery_size, TRUE);
1772 nursery_start = nursery_next = data;
1773 nursery_real_end = data + nursery_size;
1774 nursery_temp_end = data + SCAN_START_SIZE;
1775 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1776 total_alloc += nursery_size;
1777 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", nursery_size, total_alloc));
1778 section->data = section->next_data = data;
1779 section->size = nursery_size;
1780 section->end_data = nursery_real_end;
1781 scan_starts = nursery_size / SCAN_START_SIZE;
1782 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1783 section->num_scan_start = scan_starts;
1785 /* add to the section list */
1786 section->next = section_list;
1787 section_list = section;
1789 nursery_section = section;
1791 /* Setup the single first large fragment */
1792 frag = alloc_fragment ();
1793 frag->fragment_start = nursery_start;
1794 frag->fragment_limit = nursery_start;
1795 frag->fragment_end = nursery_real_end;
1796 nursery_frag_real_end = nursery_real_end;
1797 /* FIXME: frag here is lost */
1801 * Update roots in the old generation. Since we currently don't have the
1802 * info from the write barriers, we just scan all the objects.
1805 scan_old_generation (char *start, char* end)
1807 GCMemSection *section;
1809 LOSObject *big_object;
1812 for (section = section_list; section; section = section->next) {
1813 if (section == nursery_section)
1815 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1816 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1818 while (p < section->next_data) {
1823 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1824 p = scan_object (p, start, end);
1827 /* scan the old object space, too */
1828 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1829 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %d\n", big_object->data, safe_name (big_object->data), big_object->size));
1830 scan_object (big_object->data, start, end);
1832 /* scan the list of objects ready for finalization */
1833 for (fin = fin_ready_list; fin; fin = fin->next) {
1834 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1835 fin->object = copy_object (fin->object, start, end);
1839 static mword fragment_total = 0;
1841 * We found a fragment of free memory in the nursery: memzero it and if
1842 * it is big enough, add it to the list of fragments that can be used for
1846 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1849 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %d\n", frag_start, frag_end, frag_size));
1850 /* memsetting just the first chunk start is bound to provide better cache locality */
1851 memset (frag_start, 0, frag_size);
1852 /* Not worth dealing with smaller fragments: need to tune */
1853 if (frag_size >= FRAGMENT_MIN_SIZE) {
1854 fragment = alloc_fragment ();
1855 fragment->fragment_start = frag_start;
1856 fragment->fragment_limit = frag_start;
1857 fragment->fragment_end = frag_end;
1858 fragment->next = nursery_fragments;
1859 nursery_fragments = fragment;
1860 fragment_total += frag_size;
1865 drain_gray_stack (char *start_addr, char *end_addr)
1873 * We copied all the reachable objects. Now it's the time to copy
1874 * the objects that were not referenced by the roots, but by the copied objects.
1875 * we built a stack of objects pointed to by gray_start: they are
1876 * additional roots and we may add more items as we go.
1877 * We loop until gray_start == gray_objects which means no more objects have
1878 * been added. Note this is iterative: no recursion is involved.
1879 * We need to walk the LO list as well in search of marked big objects
1880 * (use a flag since this is needed only on major collections). We need to loop
1881 * here as well, so keep a counter of marked LO (increasing it in copy_object).
1884 gray_start = to_space;
1885 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1886 while (gray_start < gray_objects) {
1887 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1888 gray_start = scan_object (gray_start, start_addr, end_addr);
1891 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
1892 //scan_old_generation (start_addr, end_addr);
1893 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
1894 /* walk the finalization queue and move also the objects that need to be
1895 * finalized: use the finalized objects as new roots so the objects they depend
1896 * on are also not reclaimed. As with the roots above, only objects in the nursery
1897 * are marked/copied.
1898 * We need a loop here, since objects ready for finalizers may reference other objects
1899 * that are fin-ready. Speedup with a flag?
1902 fin_ready = num_ready_finalizers;
1903 finalize_in_range ((void**)start_addr, (void**)end_addr);
1905 /* drain the new stack that might have been created */
1906 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1907 while (gray_start < gray_objects) {
1908 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1909 gray_start = scan_object (gray_start, start_addr, end_addr);
1911 } while (fin_ready != num_ready_finalizers);
1913 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
1914 to_space = gray_start;
1915 to_space_section->next_data = to_space;
1918 * handle disappearing links
1919 * Note we do this after checking the finalization queue because if an object
1920 * survives (at least long enough to be finalized) we don't clear the link.
1921 * This also deals with a possible issue with the monitor reclamation: with the Boehm
1922 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
1925 null_link_in_range ((void**)start_addr, (void**)end_addr);
1927 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
1930 static int last_num_pinned = 0;
1933 build_nursery_fragments (int start_pin, int end_pin)
1935 char *frag_start, *frag_end;
1939 /* FIXME: handle non-NULL fragment_freelist */
1940 fragment_freelist = nursery_fragments;
1941 nursery_fragments = NULL;
1942 frag_start = nursery_start;
1944 /* clear scan starts */
1945 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
1946 for (i = start_pin; i < end_pin; ++i) {
1947 frag_end = pin_queue [i];
1948 /* remove the pin bit from pinned objects */
1949 unpin_object (frag_end);
1950 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
1951 frag_size = frag_end - frag_start;
1953 add_nursery_frag (frag_size, frag_start, frag_end);
1954 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
1955 frag_size += ALLOC_ALIGN - 1;
1956 frag_size &= ~(ALLOC_ALIGN - 1);
1957 frag_start = (char*)pin_queue [i] + frag_size;
1959 nursery_last_pinned_end = frag_start;
1960 frag_end = nursery_real_end;
1961 frag_size = frag_end - frag_start;
1963 add_nursery_frag (frag_size, frag_start, frag_end);
1964 if (!nursery_fragments) {
1965 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
1966 for (i = start_pin; i < end_pin; ++i) {
1967 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
1973 /* FIXME: later reduce code duplication here with the above
1974 * We don't keep track of section fragments for non-nursery sections yet, so
1978 build_section_fragments (GCMemSection *section)
1981 char *frag_start, *frag_end;
1984 /* clear scan starts */
1985 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
1986 frag_start = section->data;
1987 section->next_data = section->data;
1988 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
1989 frag_end = pin_queue [i];
1990 /* remove the pin bit from pinned objects */
1991 unpin_object (frag_end);
1992 if (frag_end >= section->data + section->size) {
1993 frag_end = section->data + section->size;
1995 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
1997 frag_size = frag_end - frag_start;
1999 memset (frag_start, 0, frag_size);
2000 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
2001 frag_size += ALLOC_ALIGN - 1;
2002 frag_size &= ~(ALLOC_ALIGN - 1);
2003 frag_start = (char*)pin_queue [i] + frag_size;
2004 section->next_data = MAX (section->next_data, frag_start);
2006 frag_end = section->end_data;
2007 frag_size = frag_end - frag_start;
2009 memset (frag_start, 0, frag_size);
2013 * Collect objects in the nursery.
2016 collect_nursery (size_t requested_size)
2018 GCMemSection *section;
2019 size_t max_garbage_amount;
2026 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2027 /* FIXME: optimize later to use the higher address where an object can be present */
2028 nursery_next = MAX (nursery_next, nursery_real_end);
2030 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2031 max_garbage_amount = nursery_next - nursery_start;
2033 * not enough room in the old generation to store all the possible data from
2034 * the nursery in a single continuous space.
2035 * We reset to_space if we allocated objects in degraded mode.
2037 if (to_space_section)
2038 to_space = gray_objects = to_space_section->next_data;
2039 if ((to_space_end - to_space) < max_garbage_amount) {
2040 section = alloc_section (nursery_section->size * 4);
2041 g_assert (nursery_section->size >= max_garbage_amount);
2042 to_space = gray_objects = section->next_data;
2043 to_space_end = section->end_data;
2044 to_space_section = section;
2046 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2047 nursery_section->next_data = nursery_next;
2050 /* world must be stopped already */
2052 /* pin from pinned handles */
2053 pin_from_roots (nursery_start, nursery_next);
2054 /* identify pinned objects */
2055 optimize_pin_queue (0);
2056 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2058 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2059 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2062 * walk all the roots and copy the young objects to the old generation,
2063 * starting from to_space
2066 scan_from_remsets (nursery_start, nursery_next);
2067 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2069 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2070 /* FIXME: later scan also alloc_pinned objects */
2072 /* the pinned objects are roots */
2073 for (i = 0; i < next_pin_slot; ++i) {
2074 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2075 scan_object (pin_queue [i], nursery_start, nursery_next);
2077 /* registered roots, this includes static fields */
2078 for (i = 0; i < roots_hash_size; ++i) {
2079 for (root = roots_hash [i]; root; root = root->next) {
2080 /* if desc is non-null it has precise info */
2081 if (!root->root_desc)
2083 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2084 precisely_scan_objects_from ((void**)root->start_root, root->end_root, nursery_start, nursery_next, root->root_desc);
2088 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2090 drain_gray_stack (nursery_start, nursery_next);
2092 /* walk the pin_queue, build up the fragment list of free memory, unmark
2093 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2096 build_nursery_fragments (0, next_pin_slot);
2098 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %d bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2100 /* prepare the pin queue for the next collection */
2101 last_num_pinned = next_pin_slot;
2103 if (fin_ready_list) {
2104 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2105 mono_gc_finalize_notify ();
2110 major_collection (void)
2112 GCMemSection *section, *prev_section;
2113 LOSObject *bigobj, *prevbo;
2121 /* FIXME: only use these values for the precise scan
2122 * note that to_space pointers should be excluded anyway...
2124 char *heap_start = NULL;
2125 char *heap_end = (char*)-1;
2126 size_t copy_space_required = 0;
2129 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2132 * FIXME: implement Mark/Compact
2133 * Until that is done, we can just apply mostly the same alg as for the nursery:
2134 * this means we need a big section to potentially copy all the other sections, so
2135 * it is not ideal specially with large heaps.
2137 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2138 collect_nursery (0);
2141 /* FIXME: make sure the nursery next_data ptr is updated */
2142 nursery_section->next_data = nursery_real_end;
2143 /* we should also coalesce scanning from sections close to each other
2144 * and deal with pointers outside of the sections later.
2146 /* The remsets are not useful for a major collection */
2148 /* world must be stopped already */
2150 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2151 for (section = section_list; section; section = section->next) {
2152 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2153 pin_from_roots (section->data, section->next_data);
2154 if (count != next_pin_slot) {
2156 optimize_pin_queue (count);
2157 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2158 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2159 section->pin_queue_end = next_pin_slot = count + reduced_to;
2161 copy_space_required += (char*)section->next_data - (char*)section->data;
2163 /* identify possible pointers to the insize of large objects */
2164 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2165 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2166 count = next_pin_slot;
2167 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2168 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2169 if (next_pin_slot != count) {
2170 next_pin_slot = count;
2171 pin_object (bigobj->data);
2172 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %d from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2175 /* look for pinned addresses for pinned-alloc objects */
2176 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2177 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2178 count = next_pin_slot;
2179 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2180 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2181 if (next_pin_slot != count) {
2182 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2183 next_pin_slot = count;
2188 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2189 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2191 /* allocate the big to space */
2192 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %d\n", copy_space_required));
2193 section = alloc_section (copy_space_required);
2194 to_space = gray_objects = section->next_data;
2195 to_space_end = section->end_data;
2196 to_space_section = section;
2198 /* the old generation doesn't need to be scanned (no remembered sets or card
2199 * table needed either): the only objects that must survive are those pinned and
2200 * those referenced by the precise roots.
2201 * mark any section without pinned objects, so we can free it since we will be able to
2202 * move all the objects.
2204 /* the pinned objects are roots */
2205 for (i = 0; i < next_pin_slot; ++i) {
2206 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2207 scan_object (pin_queue [i], heap_start, heap_end);
2209 /* registered roots, this includes static fields */
2210 for (i = 0; i < roots_hash_size; ++i) {
2211 for (root = roots_hash [i]; root; root = root->next) {
2212 /* if desc is non-null it has precise info */
2213 if (!root->root_desc)
2215 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2216 precisely_scan_objects_from ((void**)root->start_root, root->end_root, heap_start, heap_end, root->root_desc);
2219 /* scan the list of objects ready for finalization */
2220 for (fin = fin_ready_list; fin; fin = fin->next) {
2221 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2222 fin->object = copy_object (fin->object, heap_start, heap_end);
2225 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2227 /* all the objects in the heap */
2228 drain_gray_stack (heap_start, heap_end);
2230 /* sweep the big objects list */
2232 for (bigobj = los_object_list; bigobj;) {
2233 if (object_is_pinned (bigobj->data)) {
2234 unpin_object (bigobj->data);
2237 /* not referenced anywhere, so we can free it */
2239 prevbo->next = bigobj->next;
2241 los_object_list = bigobj->next;
2243 bigobj = bigobj->next;
2244 free_large_object (to_free);
2248 bigobj = bigobj->next;
2250 /* unpin objects from the pinned chunks and free the unmarked ones */
2251 sweep_pinned_objects ();
2253 /* free the unused sections */
2254 prev_section = NULL;
2255 for (section = section_list; section;) {
2256 /* to_space doesn't need handling here and the nursery is special */
2257 if (section == to_space_section || section == nursery_section) {
2258 prev_section = section;
2259 section = section->next;
2262 /* no pinning object, so the section is free */
2263 if (section->pin_queue_start == section->pin_queue_end) {
2264 GCMemSection *to_free;
2266 prev_section->next = section->next;
2268 section_list = section->next;
2270 section = section->next;
2271 free_mem_section (to_free);
2274 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2275 build_section_fragments (section);
2277 prev_section = section;
2278 section = section->next;
2281 /* walk the pin_queue, build up the fragment list of free memory, unmark
2282 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2285 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2287 /* prepare the pin queue for the next collection */
2289 if (fin_ready_list) {
2290 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2291 mono_gc_finalize_notify ();
2296 * Allocate a new section of memory to be used as old generation.
2298 static GCMemSection*
2299 alloc_section (size_t size)
2301 GCMemSection *section;
2304 size_t new_size = next_section_size;
2306 if (size > next_section_size) {
2308 new_size += pagesize - 1;
2309 new_size &= ~(pagesize - 1);
2311 section_size_used++;
2312 if (section_size_used > 3) {
2313 section_size_used = 0;
2314 next_section_size *= 2;
2315 if (next_section_size > max_section_size)
2316 next_section_size = max_section_size;
2318 section = get_internal_mem (sizeof (GCMemSection));
2319 data = get_os_memory (new_size, TRUE);
2320 section->data = section->next_data = data;
2321 section->size = new_size;
2322 section->end_data = data + new_size;
2323 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2324 total_alloc += new_size;
2325 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", new_size, total_alloc));
2326 section->data = data;
2327 section->size = new_size;
2328 scan_starts = new_size / SCAN_START_SIZE;
2329 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2330 section->num_scan_start = scan_starts;
2332 /* add to the section list */
2333 section->next = section_list;
2334 section_list = section;
2340 free_mem_section (GCMemSection *section)
2342 char *data = section->data;
2343 size_t size = section->size;
2344 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %d\n", data, size));
2345 free_os_memory (data, size);
2346 free_internal_mem (section);
2347 total_alloc -= size;
2351 * When deciding if it's better to collect or to expand, keep track
2352 * of how much garbage was reclaimed with the last collection: if it's too
2354 * This is called when we could not allocate a small object.
2356 static void __attribute__((noinline))
2357 minor_collect_or_expand_inner (size_t size)
2359 int do_minor_collection = 1;
2361 if (!nursery_section) {
2365 if (do_minor_collection) {
2367 collect_nursery (size);
2368 DEBUG (2, fprintf (gc_debug_file, "Heap size: %d, LOS size: %d\n", total_alloc, los_memory_usage));
2370 /* this also sets the proper pointers for the next allocation */
2371 if (!search_fragment_for_size (size)) {
2373 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2374 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %d alloc (%d pinned)", size, last_num_pinned));
2375 for (i = 0; i < last_num_pinned; ++i) {
2376 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2381 //report_internal_mem_usage ();
2385 * ######################################################################
2386 * ######## Memory allocation from the OS
2387 * ######################################################################
2388 * This section of code deals with getting memory from the OS and
2389 * allocating memory for GC-internal data structures.
2390 * Internal memory can be handled with a freelist for small objects.
2393 #ifndef MAP_ANONYMOUS
2394 #define MAP_ANONYMOUS MAP_ANON
2398 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2399 * This must not require any lock.
2402 get_os_memory (size_t size, int activate)
2405 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2407 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2408 size += pagesize - 1;
2409 size &= ~(pagesize - 1);
2410 ptr = mono_valloc (0, size, prot_flags);
2415 * Free the memory returned by get_os_memory (), returning it to the OS.
2418 free_os_memory (void *addr, size_t size)
2420 munmap (addr, size);
2427 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2429 int i, free_pages, num_free, free_mem;
2431 for (i = 0; i < chunk->num_pages; ++i) {
2432 if (!chunk->page_sizes [i])
2435 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2436 free_mem = FREELIST_PAGESIZE * free_pages;
2437 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2438 if (!chunk->free_list [i])
2441 p = chunk->free_list [i];
2446 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2447 free_mem += freelist_sizes [i] * num_free;
2449 printf ("\tfree memory in chunk: %d\n", free_mem);
2456 report_internal_mem_usage (void) {
2459 printf ("Internal memory usage:\n");
2461 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2462 report_pinned_chunk (chunk, i++);
2464 printf ("Pinned memory usage:\n");
2466 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2467 report_pinned_chunk (chunk, i++);
2472 * the array of pointers from @start to @end contains conservative
2473 * pointers to objects inside @chunk: mark each referenced object
2477 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2479 for (; start < end; start++) {
2480 char *addr = *start;
2481 int offset = (char*)addr - (char*)chunk;
2482 int page = offset / FREELIST_PAGESIZE;
2483 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2484 int slot_size = chunk->page_sizes [page];
2486 /* the page is not allocated */
2489 /* would be faster if we restrict the sizes to power of two,
2490 * but that's a waste of memory: need to measure. it could reduce
2491 * fragmentation since there are less pages needed, if for example
2492 * someone interns strings of each size we end up with one page per
2493 * interned string (still this is just ~40 KB): with more fine-grained sizes
2494 * this increases the number of used pages.
2497 obj_offset /= slot_size;
2498 obj_offset *= slot_size;
2499 addr = (char*)chunk->start_data + obj_offset;
2501 obj_offset /= slot_size;
2502 obj_offset *= slot_size;
2503 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2506 /* if the vtable is inside the chunk it's on the freelist, so skip */
2507 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2509 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2515 sweep_pinned_objects (void)
2522 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2523 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2524 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2525 for (i = 0; i < chunk->num_pages; ++i) {
2526 obj_size = chunk->page_sizes [i];
2529 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2530 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2531 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2532 while (p + obj_size <= endp) {
2534 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2535 /* if the first word (the vtable) is outside the chunk we have an object */
2536 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2537 if (object_is_pinned (ptr)) {
2539 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2541 /* FIXME: add to freelist */
2542 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2552 * Find the slot number in the freelist for memory chunks that
2553 * can contain @size objects.
2556 slot_for_size (size_t size)
2559 /* do a binary search or lookup table later. */
2560 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2561 if (freelist_sizes [slot] >= size)
2564 g_assert_not_reached ();
2569 * Build a free list for @size memory chunks from the memory area between
2570 * start_page and end_page.
2573 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2577 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2579 end = (void**)(end_page - size);
2580 g_assert (!chunk->free_list [slot]);
2581 chunk->free_list [slot] = p;
2582 while ((char*)p + size <= end) {
2584 *p = (void*)((char*)p + size);
2588 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2592 alloc_pinned_chunk (size_t size)
2597 size += pagesize; /* at least one page */
2598 size += pagesize - 1;
2599 size &= ~(pagesize - 1);
2600 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2601 size = PINNED_CHUNK_MIN_SIZE * 2;
2602 chunk = get_os_memory (size, TRUE);
2603 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2604 total_alloc += size;
2606 /* setup the bookeeping fields */
2607 chunk->num_pages = size / FREELIST_PAGESIZE;
2608 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2609 chunk->page_sizes = (void*)((char*)chunk + offset);
2610 offset += sizeof (int) * chunk->num_pages;
2611 offset += ALLOC_ALIGN - 1;
2612 offset &= ~(ALLOC_ALIGN - 1);
2613 chunk->free_list = (void*)((char*)chunk + offset);
2614 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2615 offset += ALLOC_ALIGN - 1;
2616 offset &= ~(ALLOC_ALIGN - 1);
2617 chunk->start_data = (void*)((char*)chunk + offset);
2619 /* allocate the first page to the freelist */
2620 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2621 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2622 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %d\n", chunk, size));
2623 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2624 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2628 /* assumes freelist for slot is empty, so try to alloc a new page */
2630 get_chunk_freelist (PinnedChunk *chunk, int slot)
2634 p = chunk->free_list [slot];
2636 chunk->free_list [slot] = *p;
2639 for (i = 0; i < chunk->num_pages; ++i) {
2641 if (chunk->page_sizes [i])
2643 size = freelist_sizes [slot];
2644 chunk->page_sizes [i] = size;
2645 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2649 p = chunk->free_list [slot];
2651 chunk->free_list [slot] = *p;
2658 alloc_from_freelist (size_t size)
2662 PinnedChunk *pchunk;
2663 slot = slot_for_size (size);
2664 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2665 g_assert (size <= freelist_sizes [slot]);
2666 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2667 void **p = pchunk->free_list [slot];
2669 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2670 pchunk->free_list [slot] = *p;
2674 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2675 res = get_chunk_freelist (pchunk, slot);
2679 pchunk = alloc_pinned_chunk (size);
2680 /* FIXME: handle OOM */
2681 pchunk->next = pinned_chunk_list;
2682 pinned_chunk_list = pchunk;
2683 res = get_chunk_freelist (pchunk, slot);
2687 /* used for the GC-internal data structures */
2688 /* FIXME: add support for bigger sizes by allocating more than one page
2692 get_internal_mem (size_t size)
2694 return calloc (1, size);
2697 PinnedChunk *pchunk;
2698 slot = slot_for_size (size);
2699 g_assert (size <= freelist_sizes [slot]);
2700 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2701 void **p = pchunk->free_list [slot];
2703 pchunk->free_list [slot] = *p;
2707 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2708 res = get_chunk_freelist (pchunk, slot);
2712 pchunk = alloc_pinned_chunk (size);
2713 /* FIXME: handle OOM */
2714 pchunk->next = internal_chunk_list;
2715 internal_chunk_list = pchunk;
2716 res = get_chunk_freelist (pchunk, slot);
2721 free_internal_mem (void *addr)
2725 PinnedChunk *pchunk;
2726 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2727 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
2728 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
2729 int offset = (char*)addr - (char*)pchunk;
2730 int page = offset / FREELIST_PAGESIZE;
2731 int slot = slot_for_size (pchunk->page_sizes [page]);
2733 *p = pchunk->free_list [slot];
2734 pchunk->free_list [slot] = p;
2738 printf ("free of %p failed\n", addr);
2739 g_assert_not_reached ();
2743 * ######################################################################
2744 * ######## Object allocation
2745 * ######################################################################
2746 * This section of code deals with allocating memory for objects.
2747 * There are several ways:
2748 * *) allocate large objects
2749 * *) allocate normal objects
2750 * *) fast lock-free allocation
2751 * *) allocation of pinned objects
2755 free_large_object (LOSObject *obj)
2757 size_t size = obj->size;
2758 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %d\n", obj->data, obj->size));
2760 los_memory_usage -= size;
2761 size += sizeof (LOSObject);
2762 size += pagesize - 1;
2763 size &= ~(pagesize - 1);
2764 total_alloc -= size;
2766 free_os_memory (obj, size);
2770 * Objects with size >= 64KB are allocated in the large object space.
2771 * They are currently kept track of with a linked list.
2772 * They don't move, so there is no need to pin them during collection
2773 * and we avoid the memcpy overhead.
2775 static void* __attribute__((noinline))
2776 alloc_large_inner (MonoVTable *vtable, size_t size)
2781 int just_did_major_gc = FALSE;
2783 if (los_memory_usage > next_los_collection) {
2784 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %d (los already: %u, limit: %u)\n", size, los_memory_usage, next_los_collection));
2785 just_did_major_gc = TRUE;
2787 major_collection ();
2789 /* later increase based on a percent of the heap size */
2790 next_los_collection = los_memory_usage + 5*1024*1024;
2793 alloc_size += sizeof (LOSObject);
2794 alloc_size += pagesize - 1;
2795 alloc_size &= ~(pagesize - 1);
2796 /* FIXME: handle OOM */
2797 obj = get_os_memory (alloc_size, TRUE);
2799 vtslot = (void**)obj->data;
2801 total_alloc += alloc_size;
2802 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
2803 obj->next = los_object_list;
2804 los_object_list = obj;
2805 los_memory_usage += size;
2807 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %d\n", obj->data, vtable, vtable->klass->name, size));
2811 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
2812 * an object of size @size
2813 * Return FALSE if not found (which means we need a collection)
2816 search_fragment_for_size (size_t size)
2818 Fragment *frag, *prev;
2819 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %d\n", nursery_frag_real_end, size));
2821 for (frag = nursery_fragments; frag; frag = frag->next) {
2822 if (size <= (frag->fragment_end - frag->fragment_start)) {
2823 /* remove from the list */
2825 prev->next = frag->next;
2827 nursery_fragments = frag->next;
2828 nursery_next = frag->fragment_start;
2829 nursery_frag_real_end = frag->fragment_end;
2830 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + size + SCAN_START_SIZE);
2832 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %d (req: %d)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
2833 frag->next = fragment_freelist;
2834 fragment_freelist = frag;
2843 * size is already rounded up.
2846 alloc_degraded (MonoVTable *vtable, size_t size)
2848 GCMemSection *section;
2850 for (section = section_list; section; section = section->next) {
2851 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
2852 p = section->next_data;
2857 section = alloc_section (nursery_section->size * 4);
2858 /* FIXME: handle OOM */
2859 p = section->next_data;
2861 section->next_data += size;
2862 degraded_mode += size;
2863 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %d in section %p\n", p, vtable, vtable->klass->name, size, section));
2869 * Provide a variant that takes just the vtable for small fixed-size objects.
2870 * The aligned size is already computed and stored in vt->gc_descr.
2871 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
2872 * processing. We can keep track of where objects start, for example,
2873 * so when we scan the thread stacks for pinned objects, we can start
2874 * a search for the pinned object in SCAN_START_SIZE chunks.
2877 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
2879 /* FIXME: handle OOM */
2882 size += ALLOC_ALIGN - 1;
2883 size &= ~(ALLOC_ALIGN - 1);
2885 g_assert (vtable->gc_descr);
2888 p = (void**)nursery_next;
2889 /* FIXME: handle overflow */
2890 nursery_next += size;
2891 if (nursery_next >= nursery_temp_end) {
2892 /* there are two cases: the object is too big or we need to collect */
2893 /* there can be another case (from ORP), if we cooperate with the runtime a bit:
2894 * objects that need finalizers can have the high bit set in their size
2895 * so the above check fails and we can readily add the object to the queue.
2896 * This avoids taking again the GC lock when registering, but this is moot when
2897 * doing thread-local allocation, so it may not be a good idea.
2899 if (size > MAX_SMALL_OBJ_SIZE) {
2900 /* get ready for possible collection */
2901 update_current_thread_stack (&dummy);
2902 nursery_next -= size;
2903 p = alloc_large_inner (vtable, size);
2905 if (nursery_next >= nursery_frag_real_end) {
2906 nursery_next -= size;
2907 /* when running in degraded mode, we continue allocing that way
2908 * for a while, to decrease the number of useless nursery collections.
2910 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
2911 p = alloc_degraded (vtable, size);
2915 if (!search_fragment_for_size (size)) {
2916 /* get ready for possible collection */
2917 update_current_thread_stack (&dummy);
2918 minor_collect_or_expand_inner (size);
2919 if (degraded_mode) {
2920 p = alloc_degraded (vtable, size);
2925 /* nursery_next changed by minor_collect_or_expand_inner () */
2926 p = (void*)nursery_next;
2927 nursery_next += size;
2928 if (nursery_next > nursery_temp_end) {
2933 /* record the scan start so we can find pinned objects more easily */
2934 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = p;
2935 /* we just bump nursery_temp_end as well */
2936 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + SCAN_START_SIZE);
2937 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", nursery_next, nursery_temp_end));
2941 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2950 * To be used for interned strings and possibly MonoThread, reflection handles.
2951 * We may want to explicitly free these objects.
2954 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
2956 /* FIXME: handle OOM */
2958 size += ALLOC_ALIGN - 1;
2959 size &= ~(ALLOC_ALIGN - 1);
2961 if (size > MAX_FREELIST_SIZE) {
2962 update_current_thread_stack (&p);
2963 /* large objects are always pinned anyway */
2964 p = alloc_large_inner (vtable, size);
2966 p = alloc_from_freelist (size);
2967 memset (p, 0, size);
2969 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2976 * ######################################################################
2977 * ######## Finalization support
2978 * ######################################################################
2982 * this is valid for the nursery: if the object has been forwarded it means it's
2983 * still refrenced from a root. If it is pinned it's still alive as well.
2984 * Return TRUE if @obj is ready to be finalized.
2986 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
2989 finalize_in_range (void **start, void **end)
2991 FinalizeEntry *entry, *prev;
2995 for (i = 0; i < finalizable_hash_size; ++i) {
2997 for (entry = finalizable_hash [i]; entry;) {
2998 if (entry->object >= start && entry->object < end) {
2999 if (object_is_fin_ready (entry->object)) {
3001 FinalizeEntry *next;
3002 /* remove and put in fin_ready_list */
3004 prev->next = entry->next;
3006 finalizable_hash [i] = entry->next;
3008 num_ready_finalizers++;
3009 num_registered_finalizers--;
3010 entry->next = fin_ready_list;
3011 fin_ready_list = entry;
3012 /* Make it survive */
3013 from = entry->object;
3014 entry->object = copy_object (entry->object, start, end);
3015 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3019 /* update pointer */
3020 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3021 entry->object = copy_object (entry->object, start, end);
3025 entry = entry->next;
3031 null_link_in_range (void **start, void **end)
3033 FinalizeEntry *entry, *prev;
3035 for (i = 0; i < disappearing_link_hash_size; ++i) {
3037 for (entry = disappearing_link_hash [i]; entry;) {
3038 if (entry->object >= start && entry->object < end) {
3039 if (object_is_fin_ready (entry->object)) {
3040 void **p = entry->data;
3043 /* remove from list */
3045 prev->next = entry->next;
3047 disappearing_link_hash [i] = entry->next;
3048 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3050 free_internal_mem (entry);
3055 /* update pointer if it's moved
3056 * FIXME: what if an object is moved earlier?
3058 entry->object = copy_object (entry->object, start, end);
3059 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3061 *link = entry->object;
3065 entry = entry->next;
3071 * mono_gc_finalizers_for_domain:
3072 * @domain: the unloading appdomain
3073 * @out_array: output array
3074 * @out_size: size of output array
3076 * Store inside @out_array up to @out_size objects that belong to the unloading
3077 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3078 * until it returns 0.
3079 * The items are removed from the finalizer data structure, so the caller is supposed
3081 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3084 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3086 FinalizeEntry *entry, *prev;
3088 if (no_finalize || !out_size || !out_array)
3092 for (i = 0; i < finalizable_hash_size; ++i) {
3094 for (entry = finalizable_hash [i]; entry;) {
3095 if (mono_object_domain (entry->object) == domain) {
3096 FinalizeEntry *next;
3097 /* remove and put in out_array */
3099 prev->next = entry->next;
3101 finalizable_hash [i] = entry->next;
3103 num_registered_finalizers--;
3104 out_array [count ++] = entry->object;
3105 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3107 if (count == out_size) {
3114 entry = entry->next;
3122 rehash_fin_table (void)
3126 FinalizeEntry **new_hash;
3127 FinalizeEntry *entry, *next;
3128 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3130 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3131 for (i = 0; i < finalizable_hash_size; ++i) {
3132 for (entry = finalizable_hash [i]; entry; entry = next) {
3133 hash = mono_object_hash (entry->object) % new_size;
3135 entry->next = new_hash [hash];
3136 new_hash [hash] = entry;
3139 free_internal_mem (finalizable_hash);
3140 finalizable_hash = new_hash;
3141 finalizable_hash_size = new_size;
3145 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3147 FinalizeEntry *entry, *prev;
3151 hash = mono_object_hash (obj);
3153 if (num_registered_finalizers >= finalizable_hash_size * 2)
3154 rehash_fin_table ();
3155 hash %= finalizable_hash_size;
3157 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3158 if (entry->object == obj) {
3160 entry->data = user_data;
3162 /* remove from the list */
3164 prev->next = entry->next;
3166 finalizable_hash [hash] = entry->next;
3167 num_registered_finalizers--;
3168 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3169 free_internal_mem (entry);
3177 /* request to deregister, but already out of the list */
3181 entry = get_internal_mem (sizeof (FinalizeEntry));
3182 entry->object = obj;
3183 entry->data = user_data;
3184 entry->next = finalizable_hash [hash];
3185 finalizable_hash [hash] = entry;
3186 num_registered_finalizers++;
3187 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3192 rehash_dislink (void)
3196 FinalizeEntry **new_hash;
3197 FinalizeEntry *entry, *next;
3198 int new_size = g_spaced_primes_closest (num_disappearing_links);
3200 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3201 for (i = 0; i < disappearing_link_hash_size; ++i) {
3202 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3203 hash = mono_aligned_addr_hash (entry->data) % new_size;
3205 entry->next = new_hash [hash];
3206 new_hash [hash] = entry;
3209 free_internal_mem (disappearing_link_hash);
3210 disappearing_link_hash = new_hash;
3211 disappearing_link_hash_size = new_size;
3215 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3217 FinalizeEntry *entry, *prev;
3221 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3223 /* FIXME: add check that link is not in the heap */
3224 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3225 entry = disappearing_link_hash [hash];
3227 for (; entry; entry = entry->next) {
3228 /* link already added */
3229 if (link == entry->data) {
3230 /* NULL obj means remove */
3233 prev->next = entry->next;
3235 disappearing_link_hash [hash] = entry->next;
3236 num_disappearing_links--;
3237 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3238 free_internal_mem (entry);
3240 entry->object = obj; /* we allow the change of object */
3247 entry = get_internal_mem (sizeof (FinalizeEntry));
3248 entry->object = obj;
3250 entry->next = disappearing_link_hash [hash];
3251 disappearing_link_hash [hash] = entry;
3252 num_disappearing_links++;
3253 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3258 mono_gc_invoke_finalizers (void)
3260 FinalizeEntry *entry;
3263 /* FIXME: batch to reduce lock contention */
3264 while (fin_ready_list) {
3266 entry = fin_ready_list;
3268 fin_ready_list = entry->next;
3269 num_ready_finalizers--;
3270 obj = entry->object;
3271 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3275 void (*callback)(void *, void*) = entry->data;
3277 obj = entry->object;
3279 /* the object is on the stack so it is pinned */
3280 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3281 callback (obj, NULL);
3282 free_internal_mem (entry);
3289 mono_gc_pending_finalizers (void)
3291 return fin_ready_list != NULL;
3294 /* Negative value to remove */
3296 mono_gc_add_memory_pressure (gint64 value)
3298 /* FIXME: Use interlocked functions */
3300 memory_pressure += value;
3305 * ######################################################################
3306 * ######## registered roots support
3307 * ######################################################################
3315 RootRecord **new_hash;
3316 RootRecord *entry, *next;
3317 int new_size = g_spaced_primes_closest (num_roots_entries);
3319 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3320 for (i = 0; i < roots_hash_size; ++i) {
3321 for (entry = roots_hash [i]; entry; entry = next) {
3322 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3324 entry->next = new_hash [hash];
3325 new_hash [hash] = entry;
3328 free_internal_mem (roots_hash);
3329 roots_hash = new_hash;
3330 roots_hash_size = new_size;
3334 * We do not coalesce roots.
3337 mono_gc_register_root (char *start, size_t size, void *descr)
3339 RootRecord *new_root;
3340 unsigned int hash = mono_aligned_addr_hash (start);
3342 if (num_roots_entries >= roots_hash_size * 2)
3344 hash %= roots_hash_size;
3345 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3346 /* we allow changing the size and the descriptor (for thread statics etc) */
3347 if (new_root->start_root == start) {
3348 size_t old_size = new_root->end_root - new_root->start_root;
3349 new_root->end_root = new_root->start_root + size;
3350 new_root->root_desc = (mword)descr;
3352 roots_size -= old_size;
3357 new_root = get_internal_mem (sizeof (RootRecord));
3359 new_root->start_root = start;
3360 new_root->end_root = new_root->start_root + size;
3361 new_root->root_desc = (mword)descr;
3363 num_roots_entries++;
3364 new_root->next = roots_hash [hash];
3365 roots_hash [hash] = new_root;
3366 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3376 mono_gc_deregister_root (char* addr)
3378 RootRecord *tmp, *prev = NULL;
3379 unsigned int hash = mono_aligned_addr_hash (addr);
3381 hash %= roots_hash_size;
3382 tmp = roots_hash [hash];
3384 if (tmp->start_root == (char*)addr) {
3386 prev->next = tmp->next;
3388 roots_hash [hash] = tmp->next;
3389 roots_size -= (tmp->end_root - tmp->start_root);
3390 num_roots_entries--;
3391 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3392 free_internal_mem (tmp);
3402 * ######################################################################
3403 * ######## Thread handling (stop/start code)
3404 * ######################################################################
3407 #undef pthread_create
3409 #undef pthread_detach
3412 void *(*start_routine) (void *);
3416 } SgenThreadStartInfo;
3418 /* eventually share with MonoThread? */
3419 typedef struct _SgenThreadInfo SgenThreadInfo;
3421 struct _SgenThreadInfo {
3422 SgenThreadInfo *next;
3424 unsigned int stop_count; /* to catch duplicate signals */
3429 RememberedSet **remset;
3432 /* FIXME: handle large/small config */
3433 #define THREAD_HASH_SIZE 11
3434 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3436 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3437 static sem_t suspend_ack_semaphore;
3438 static unsigned int global_stop_count = 0;
3439 static int suspend_signal_num = SIGPWR;
3440 static int restart_signal_num = SIGXCPU;
3441 static sigset_t suspend_signal_mask;
3442 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3444 /* LOCKING: assumes the GC lock is held */
3445 static SgenThreadInfo*
3446 thread_info_lookup (pthread_t id)
3448 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3449 SgenThreadInfo *info;
3451 info = thread_table [hash];
3452 while (info && !pthread_equal (info->id, id)) {
3459 update_current_thread_stack (void *start)
3461 void *ptr = cur_thread_regs;
3462 SgenThreadInfo *info = thread_info_lookup (pthread_self ());
3463 info->stack_start = align_pointer (&ptr);
3464 ARCH_STORE_REGS (ptr);
3468 signal_desc (int signum)
3470 if (signum == suspend_signal_num)
3472 if (signum == restart_signal_num)
3477 /* LOCKING: assumes the GC lock is held */
3479 thread_handshake (int signum)
3481 int count, i, result;
3482 SgenThreadInfo *info;
3483 pthread_t me = pthread_self ();
3486 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3487 for (info = thread_table [i]; info; info = info->next) {
3488 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3489 if (pthread_equal (info->id, me)) {
3490 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3493 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3495 result = pthread_kill (info->id, signum);
3497 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3500 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3506 for (i = 0; i < count; ++i) {
3507 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3508 if (errno != EINTR) {
3509 g_error ("sem_wait ()");
3516 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3518 suspend_handler (int sig)
3520 SgenThreadInfo *info;
3523 int old_errno = errno;
3525 id = pthread_self ();
3526 info = thread_info_lookup (id);
3527 stop_count = global_stop_count;
3528 /* duplicate signal */
3529 if (0 && info->stop_count == stop_count) {
3533 /* update the remset info in the thread data structure */
3534 info->remset = remembered_set;
3536 * this includes the register values that the kernel put on the stack.
3537 * Write arch-specific code to only push integer regs and a more accurate
3540 info->stack_start = align_pointer (&id);
3542 /* notify the waiting thread */
3543 sem_post (&suspend_ack_semaphore);
3544 info->stop_count = stop_count;
3546 /* wait until we receive the restart signal */
3549 sigsuspend (&suspend_signal_mask);
3550 } while (info->signal != restart_signal_num);
3552 /* notify the waiting thread */
3553 sem_post (&suspend_ack_semaphore);
3559 restart_handler (int sig)
3561 SgenThreadInfo *info;
3562 int old_errno = errno;
3564 info = thread_info_lookup (pthread_self ());
3565 info->signal = restart_signal_num;
3570 static TV_DECLARE (stop_world_time);
3571 static unsigned long max_pause_usec = 0;
3573 /* LOCKING: assumes the GC lock is held */
3579 global_stop_count++;
3580 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (pthread_self ()), (gpointer)pthread_self ()));
3581 TV_GETTIME (stop_world_time);
3582 count = thread_handshake (suspend_signal_num);
3583 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3587 /* LOCKING: assumes the GC lock is held */
3589 restart_world (void)
3592 TV_DECLARE (end_sw);
3595 count = thread_handshake (restart_signal_num);
3596 TV_GETTIME (end_sw);
3597 usec = TV_ELAPSED (stop_world_time, end_sw);
3598 max_pause_usec = MAX (usec, max_pause_usec);
3599 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3604 * Identify objects pinned in a thread stack and its registers.
3607 pin_thread_data (void *start_nursery, void *end_nursery)
3610 SgenThreadInfo *info;
3612 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3613 for (info = thread_table [i]; info; info = info->next) {
3615 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3618 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3619 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3622 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3623 conservatively_pin_objects_from (cur_thread_regs, cur_thread_regs + ARCH_NUM_REGS, start_nursery, end_nursery);
3627 find_pinning_ref_from_thread (char *obj, size_t size)
3630 SgenThreadInfo *info;
3631 char *endobj = obj + size;
3633 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3634 for (info = thread_table [i]; info; info = info->next) {
3635 char **start = (char**)info->stack_start;
3638 while (start < (char**)info->stack_end) {
3639 if (*start >= obj && *start < endobj) {
3640 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, info->id, start, info->stack_start, info->stack_end));
3646 /* FIXME: check register */
3649 /* return TRUE if ptr points inside the managed heap */
3651 ptr_in_heap (void* ptr)
3653 mword p = (mword)ptr;
3654 if (p < lowest_heap_address || p >= highest_heap_address)
3656 /* FIXME: more checks */
3661 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
3666 /* FIXME: exclude stack locations */
3667 switch ((*p) & REMSET_TYPE_MASK) {
3668 case REMSET_LOCATION:
3670 if ((ptr < start_nursery || ptr >= end_nursery) && ptr_in_heap (ptr)) {
3671 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3672 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
3673 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3674 add_to_global_remset (ptr);
3676 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
3680 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3681 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3684 while (count-- > 0) {
3685 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3686 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
3687 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3688 add_to_global_remset (ptr);
3693 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3694 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3696 scan_object (*ptr, start_nursery, end_nursery);
3699 g_assert_not_reached ();
3705 scan_from_remsets (void *start_nursery, void *end_nursery)
3708 SgenThreadInfo *info;
3709 RememberedSet *remset, *next;
3712 /* the global one */
3713 for (remset = global_remset; remset; remset = remset->next) {
3714 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %d\n", remset->data, remset->store_next, remset->store_next - remset->data));
3715 for (p = remset->data; p < remset->store_next;) {
3716 p = handle_remset (p, start_nursery, end_nursery, TRUE);
3719 /* the per-thread ones */
3720 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3721 for (info = thread_table [i]; info; info = info->next) {
3722 for (remset = info->remset; remset; remset = next) {
3723 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %d\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
3724 for (p = remset->data; p < remset->store_next;) {
3725 p = handle_remset (p, start_nursery, end_nursery, FALSE);
3727 remset->store_next = remset->data;
3728 next = remset->next;
3729 remset->next = NULL;
3730 if (remset != info->remset) {
3731 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3732 free_internal_mem (remset);
3740 * Clear the info in the remembered sets: we're doing a major collection, so
3741 * the per-thread ones are not needed and the global ones will be reconstructed
3745 clear_remsets (void)
3748 SgenThreadInfo *info;
3749 RememberedSet *remset, *next;
3751 /* the global list */
3752 for (remset = global_remset; remset; remset = next) {
3753 remset->store_next = remset->data;
3754 next = remset->next;
3755 remset->next = NULL;
3756 if (remset != global_remset) {
3757 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3758 free_internal_mem (remset);
3761 /* the per-thread ones */
3762 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3763 for (info = thread_table [i]; info; info = info->next) {
3764 for (remset = info->remset; remset; remset = next) {
3765 remset->store_next = remset->data;
3766 next = remset->next;
3767 remset->next = NULL;
3768 if (remset != info->remset) {
3769 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3770 free_internal_mem (remset);
3777 /* LOCKING: assumes the GC lock is held */
3778 static SgenThreadInfo*
3779 gc_register_current_thread (void *addr)
3782 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
3785 info->id = pthread_self ();
3786 info->stop_count = -1;
3789 info->stack_start = NULL;
3791 /* try to get it with attributes first */
3792 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
3796 pthread_attr_t attr;
3797 pthread_getattr_np (pthread_self (), &attr);
3798 pthread_attr_getstack (&attr, &sstart, &size);
3799 info->stack_end = (char*)sstart + size;
3801 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
3802 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
3805 /* FIXME: we assume the stack grows down */
3806 gsize stack_bottom = (gsize)addr;
3807 stack_bottom += 4095;
3808 stack_bottom &= ~4095;
3809 info->stack_end = (char*)stack_bottom;
3813 /* hash into the table */
3814 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
3815 info->next = thread_table [hash];
3816 thread_table [hash] = info;
3818 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
3819 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
3824 unregister_current_thread (void)
3827 SgenThreadInfo *prev = NULL;
3829 pthread_t id = pthread_self ();
3831 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3832 p = thread_table [hash];
3834 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
3835 while (!pthread_equal (p->id, id)) {
3840 thread_table [hash] = p->next;
3842 prev->next = p->next;
3844 /* FIXME: transfer remsets if any */
3849 gc_start_thread (void *arg)
3851 SgenThreadStartInfo *start_info = arg;
3852 SgenThreadInfo* info;
3853 void *t_arg = start_info->arg;
3854 void *(*start_func) (void*) = start_info->start_routine;
3858 info = gc_register_current_thread (&result);
3860 sem_post (&(start_info->registered));
3861 result = start_func (t_arg);
3863 unregister_current_thread ();
3870 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
3872 SgenThreadStartInfo *start_info;
3875 start_info = malloc (sizeof (SgenThreadStartInfo));
3878 sem_init (&(start_info->registered), 0, 0);
3879 start_info->arg = arg;
3880 start_info->start_routine = start_routine;
3882 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
3884 while (sem_wait (&(start_info->registered)) != 0) {
3885 /*if (EINTR != errno) ABORT("sem_wait failed"); */
3888 sem_destroy (&(start_info->registered));
3894 mono_gc_pthread_join (pthread_t thread, void **retval)
3896 return pthread_join (thread, retval);
3900 mono_gc_pthread_detach (pthread_t thread)
3902 return pthread_detach (thread);
3906 mono_gc_register_thread (void *baseptr)
3908 SgenThreadInfo *info;
3910 info = thread_info_lookup (pthread_self ());
3912 info = gc_register_current_thread (baseptr);
3914 return info != NULL;
3918 * ######################################################################
3919 * ######## Write barriers
3920 * ######################################################################
3923 static RememberedSet*
3924 alloc_remset (int size, gpointer id) {
3925 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
3926 res->store_next = res->data;
3927 res->end_set = res->data + size;
3929 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
3934 * Note: the write barriers first do the needed GC work and then do the actual store:
3935 * this way the value is visible to the conservative GC scan after the write barrier
3936 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
3937 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
3938 * happens when we need to record which pointers contain references to the new generation.
3939 * The write barrier will be executed, but the pointer is still not stored.
3942 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
3945 if (field_ptr >= nursery_start && field_ptr < nursery_real_end) {
3946 *(void**)field_ptr = value;
3949 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
3950 rs = remembered_set;
3951 if (rs->store_next < rs->end_set) {
3952 *(rs->store_next++) = (mword)field_ptr;
3953 *(void**)field_ptr = value;
3956 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3957 rs->next = remembered_set;
3958 remembered_set = rs;
3959 thread_info_lookup (pthread_self())->remset = rs;
3960 *(rs->store_next++) = (mword)field_ptr;
3961 *(void**)field_ptr = value;
3965 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
3967 RememberedSet *rs = remembered_set;
3968 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end) {
3969 *(void**)slot_ptr = value;
3972 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
3973 if (rs->store_next < rs->end_set) {
3974 *(rs->store_next++) = (mword)slot_ptr;
3975 *(void**)slot_ptr = value;
3978 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3979 rs->next = remembered_set;
3980 remembered_set = rs;
3981 thread_info_lookup (pthread_self())->remset = rs;
3982 *(rs->store_next++) = (mword)slot_ptr;
3983 *(void**)slot_ptr = value;
3987 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
3989 RememberedSet *rs = remembered_set;
3990 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end)
3992 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
3993 if (rs->store_next + 1 < rs->end_set) {
3994 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
3995 *(rs->store_next++) = count;
3998 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3999 rs->next = remembered_set;
4000 remembered_set = rs;
4001 thread_info_lookup (pthread_self())->remset = rs;
4002 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4003 *(rs->store_next++) = count;
4007 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4009 RememberedSet *rs = remembered_set;
4010 if (ptr >= nursery_start && ptr < nursery_real_end) {
4011 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4012 *(void**)ptr = value;
4015 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", ptr));
4016 /* FIXME: ensure it is on the heap */
4017 if (rs->store_next < rs->end_set) {
4018 *(rs->store_next++) = (mword)ptr;
4019 *(void**)ptr = value;
4022 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4023 rs->next = remembered_set;
4024 remembered_set = rs;
4025 thread_info_lookup (pthread_self())->remset = rs;
4026 *(rs->store_next++) = (mword)ptr;
4027 *(void**)ptr = value;
4031 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4033 if (dest >= nursery_start && dest < nursery_real_end) {
4036 DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4040 * mono_gc_wbarrier_object:
4042 * Write barrier to call when obj is the result of a clone or copy of an object.
4045 mono_gc_wbarrier_object (MonoObject* obj)
4047 RememberedSet *rs = remembered_set;
4048 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4049 if (rs->store_next < rs->end_set) {
4050 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4053 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4054 rs->next = remembered_set;
4055 remembered_set = rs;
4056 thread_info_lookup (pthread_self())->remset = rs;
4057 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4061 * ######################################################################
4062 * ######## Other mono public interface functions.
4063 * ######################################################################
4067 mono_gc_collect (int generation)
4070 update_current_thread_stack (&generation);
4072 if (generation == 0) {
4073 collect_nursery (0);
4075 major_collection ();
4082 mono_gc_max_generation (void)
4088 mono_gc_collection_count (int generation)
4090 if (generation == 0)
4091 return num_minor_gcs;
4092 return num_major_gcs;
4096 mono_gc_get_used_size (void)
4099 GCMemSection *section;
4101 tot = los_memory_usage;
4102 for (section = section_list; section; section = section->next) {
4103 /* this is approximate... */
4104 tot += section->next_data - section->data;
4106 /* FIXME: account for pinned objects */
4112 mono_gc_get_heap_size (void)
4118 mono_gc_disable (void)
4126 mono_gc_enable (void)
4134 mono_object_is_alive (MonoObject* o)
4140 mono_gc_get_generation (MonoObject *obj)
4142 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
4148 mono_gc_enable_events (void)
4153 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
4155 mono_gc_register_disappearing_link (obj, link_addr);
4160 mono_gc_weak_link_remove (void **link_addr)
4162 mono_gc_register_disappearing_link (NULL, link_addr);
4167 mono_gc_weak_link_get (void **link_addr)
4173 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
4175 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
4176 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
4179 /* conservative scanning */
4180 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
4185 mono_gc_alloc_fixed (size_t size, void *descr)
4187 /* FIXME: do a single allocation */
4188 void *res = calloc (1, size);
4191 if (!mono_gc_register_root (res, size, descr)) {
4199 mono_gc_free_fixed (void* addr)
4201 mono_gc_deregister_root (addr);
4206 mono_gc_is_gc_thread (void)
4210 result = thread_info_lookup (pthread_self ()) != NULL;
4216 mono_gc_base_init (void)
4219 struct sigaction sinfo;
4222 if (gc_initialized) {
4226 gc_initialized = TRUE;
4227 pagesize = mono_pagesize ();
4228 gc_debug_file = stderr;
4229 /* format: MONO_GC_DEBUG=l[,filename] where l is a debug level 0-9 */
4230 if ((env = getenv ("MONO_GC_DEBUG"))) {
4231 if (env [0] >= '0' && env [0] <= '9') {
4232 gc_debug_level = atoi (env);
4238 char *rf = g_strdup_printf ("%s.%d", env, getpid ());
4239 gc_debug_file = fopen (rf, "wb");
4241 gc_debug_file = stderr;
4246 sem_init (&suspend_ack_semaphore, 0, 0);
4248 sigfillset (&sinfo.sa_mask);
4249 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
4250 sinfo.sa_handler = suspend_handler;
4251 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
4252 g_error ("failed sigaction");
4255 sinfo.sa_handler = restart_handler;
4256 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
4257 g_error ("failed sigaction");
4260 sigfillset (&suspend_signal_mask);
4261 sigdelset (&suspend_signal_mask, restart_signal_num);
4263 global_remset = alloc_remset (1024, NULL);
4264 global_remset->next = NULL;
4267 mono_gc_register_thread (&sinfo);
4270 #endif /* HAVE_SGEN_GC */