2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (firts vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
130 #include <semaphore.h>
134 #include <sys/types.h>
135 #include <sys/stat.h>
136 #include <sys/mman.h>
137 #include <sys/time.h>
140 #include "metadata/metadata-internals.h"
141 #include "metadata/class-internals.h"
142 #include "metadata/gc-internal.h"
143 #include "metadata/object-internals.h"
144 #include "metadata/threads.h"
145 #include "metadata/sgen-gc.h"
146 #include "metadata/mono-gc.h"
147 #include "utils/mono-mmap.h"
151 * ######################################################################
152 * ######## Types and constants used by the GC.
153 * ######################################################################
155 #if SIZEOF_VOID_P == 4
156 typedef guint32 mword;
158 typedef guint64 mword;
161 static int gc_initialized = 0;
162 static int gc_debug_level = 0;
163 static FILE* gc_debug_file;
166 mono_gc_flush_info (void)
168 fflush (gc_debug_file);
171 #define MAX_DEBUG_LEVEL 9
172 #define DEBUG(level,a) do {if ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level) a;} while (0)
174 #define TV_DECLARE(name) struct timeval name
175 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
176 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
178 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
188 /* each request from the OS ends up in a GCMemSection */
189 typedef struct _GCMemSection GCMemSection;
190 struct _GCMemSection {
194 /* pointer where more data could be allocated if it fits */
198 * scan starts is an array of pointers to objects equally spaced in the allocation area
199 * They let use quickly find pinned objects from pinning pointers.
202 /* in major collections indexes in the pin_queue for objects that pin this section */
205 unsigned short num_scan_start;
209 /* large object space struct: 64+ KB */
210 /* we could make this limit much smaller to avoid memcpy copy
211 * and potentially have more room in the GC descriptor: need to measure
212 * This also means that such small OS objects will need to be
213 * allocated in a different way (using pinned chunks).
214 * We may want to put large but smaller than 64k objects in the fixed space
215 * when we move the object from one generation to another (to limit the
216 * pig in the snake effect).
217 * Note: it may be worth to have an optimized copy function, since we can
218 * assume that objects are aligned and have a multiple of 8 size.
219 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
220 * true if MONO_ZERO_LEN_ARRAY is nonzero.
222 typedef struct _LOSObject LOSObject;
225 mword size; /* this is the object size */
226 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
228 char data [MONO_ZERO_LEN_ARRAY];
231 /* Pinned objects are allocated in the LOS space if bigger than half a page
232 * or from freelists otherwise. We assume that pinned objects are relatively few
233 * and they have a slow dying speed (like interned strings, thread objects).
234 * As such they will be collected only at major collections.
235 * free lists are not global: when we need memory we allocate a PinnedChunk.
236 * Each pinned chunk is made of several pages, the first of wich is used
237 * internally for bookeeping (here think of a page as 4KB). The bookeeping
238 * includes the freelists vectors and info about the object size of each page
239 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
240 * a size is assigned to it, the page is divided in the proper chunks and each
241 * chunk is added to the freelist. To not waste space, the remaining space in the
242 * first page is used as objects of size 16 or 32 (need to measure which are more
244 * We use this same structure to allocate memory used internally by the GC, so
245 * we never use malloc/free if we need to alloc during collection: the world is stopped
246 * and malloc/free will deadlock.
247 * When we want to iterate over pinned objects, we just scan a page at a time
248 * linearly according to the size of objects in the page: the next pointer used to link
249 * the items in the freelist uses the same word as the vtable. Since we keep freelists
250 * for each pinned chunk, if the word points outside the pinned chunk it means
252 * We could avoid this expensive scanning in creative ways. We could have a policy
253 * of putting in the pinned space only objects we know about that have no struct fields
254 * with references and we can easily use a even expensive write barrier for them,
255 * since pointer writes on such objects should be rare.
256 * The best compromise is to just alloc interned strings and System.MonoType in them.
257 * It would be nice to allocate MonoThread in it, too: must check that we properly
258 * use write barriers so we don't have to do any expensive scanning of the whole pinned
259 * chunk list during minor collections. We can avoid it now because we alloc in it only
260 * reference-free objects.
262 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
263 #define MAX_FREELIST_SIZE 2048
264 #define PINNED_PAGE_SIZE (4096)
265 #define PINNED_CHUNK_MIN_SIZE (4096*8)
266 typedef struct _PinnedChunk PinnedChunk;
267 struct _PinnedChunk {
270 int *page_sizes; /* a 0 means the page is still unused */
273 void *data [1]; /* page sizes and free lists are stored here */
277 * The young generation is divided into fragments. This is because
278 * we can hand one fragments to a thread for lock-less fast alloc and
279 * because the young generation ends up fragmented anyway by pinned objects.
280 * Once a collection is done, a list of fragments is created. When doing
281 * thread local alloc we use smallish nurseries so we allow new threads to
282 * allocate memory from gen0 without triggering a collection. Threads that
283 * are found to allocate lots of memory are given bigger fragments. This
284 * should make the finalizer thread use little nursery memory after a while.
285 * We should start assigning threads very small fragments: if there are many
286 * threads the nursery will be full of reserved space that the threads may not
287 * use at all, slowing down allocation speed.
289 typedef struct _Fragment Fragment;
293 char *fragment_start;
294 char *fragment_limit; /* the current soft limit for allocation */
298 /* the runtime can register areas of memory as roots: we keep two lists of roots,
299 * a pinned root set for conservatively scanned roots and a normal one for
300 * precisely scanned roots (currently implemented as a single list).
302 typedef struct _RootRecord RootRecord;
310 /* for use with write barriers */
311 typedef struct _RememberedSet RememberedSet;
312 struct _RememberedSet {
316 mword data [MONO_ZERO_LEN_ARRAY];
319 /* we have 4 possible values in the low 2 bits */
321 REMSET_LOCATION, /* just a pointer to the exact location */
322 REMSET_RANGE, /* range of pointer fields */
323 REMSET_OBJECT, /* mark all the object for scanning */
324 REMSET_TYPE_MASK = 0x3
327 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
328 static RememberedSet *global_remset;
329 static int store_to_global_remset = 0;
331 /* FIXME: later choose a size that takes into account the RememberedSet struct
332 * and doesn't waste any alloc paddin space.
334 #define DEFAULT_REMSET_SIZE 1024
335 static RememberedSet* alloc_remset (int size, gpointer id);
337 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
338 * no cast from a pointer to an integer
345 /* these bits are set in the object vtable: we could merge them since an object can be
346 * either pinned or forwarded but not both.
347 * We store them in the vtable slot because the bits are used in the sync block for
348 * other purpouses: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
349 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
350 * would be an invalid combination for the monitor and hash code).
351 * The values are already shifted.
352 * The forwarding address is stored in the sync block.
354 #define FORWARDED_BIT 1
356 #define VTABLE_BITS_MASK 0x3
358 /* returns NULL if not forwarded, or the forwarded address */
359 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
360 /* set the forwarded address fw_addr for object obj */
361 #define forward_object(obj,fw_addr) do { \
362 ((mword*)(obj))[0] |= FORWARDED_BIT; \
363 ((mword*)(obj))[1] = (mword)(fw_addr); \
366 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
367 #define pin_object(obj) do { \
368 ((mword*)(obj))[0] |= PINNED_BIT; \
370 #define unpin_object(obj) do { \
371 ((mword*)(obj))[0] &= ~PINNED_BIT; \
376 * Since we set bits in the vtable, use the macro to load it from the pointer to
377 * an object that is potentially pinned.
379 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
382 safe_name (void* obj)
384 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
385 return vt->klass->name;
389 safe_object_get_size (MonoObject* o)
391 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
392 if (klass == mono_defaults.string_class) {
393 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
394 } else if (klass->rank) {
395 MonoArray *array = (MonoArray*)o;
396 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
400 size += sizeof (MonoArrayBounds) * klass->rank;
404 /* from a created object: the class must be inited already */
405 return klass->instance_size;
410 * ######################################################################
411 * ######## Global data.
412 * ######################################################################
414 static LOCK_DECLARE (gc_mutex);
415 static int gc_disabled = 0;
416 static int num_minor_gcs = 0;
417 static int num_major_gcs = 0;
419 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
420 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
421 #define DEFAULT_NURSERY_SIZE (1024*512*2)
422 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
423 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
424 /* to quickly find the heard of an object pinned by a conservative address
425 * we keep track of the objects allocated for each SCAN_START_SIZE memory
426 * chunk in the nursery or other memory sections. Larger values have less
427 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
429 #define SCAN_START_SIZE (4096*2)
430 /* the minimum size of a fragment that we consider useful for allocation */
431 #define FRAGMENT_MIN_SIZE (512)
432 /* This is a fixed value used for pinned chunks, not the system pagesize */
433 #define FREELIST_PAGESIZE 4096
435 static mword pagesize = 4096;
436 static mword nursery_size = DEFAULT_NURSERY_SIZE;
437 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
438 static mword max_section_size = DEFAULT_MAX_SECTION;
439 static int section_size_used = 0;
440 static int degraded_mode = 0;
442 static LOSObject *los_object_list = NULL;
443 static mword los_memory_usage = 0;
444 static mword los_num_objects = 0;
445 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
446 static mword total_alloc = 0;
447 /* use this to tune when to do a major/minor collection */
448 static mword memory_pressure = 0;
450 static GCMemSection *section_list = NULL;
451 static GCMemSection *nursery_section = NULL;
452 static mword lowest_heap_address = ~(mword)0;
453 static mword highest_heap_address = 0;
455 typedef struct _FinalizeEntry FinalizeEntry;
456 struct _FinalizeEntry {
459 void *data; /* can be a disappearing link or the data for the finalizer */
460 /* Note we could use just one pointer if we don't support multiple callbacks
461 * for finalizers and per-finalizer data and if we store the obj pointers
462 * in the link like libgc does
467 * The finalizable hash has the object as the key, the
468 * disappearing_link hash, has the link address as key.
470 static FinalizeEntry **finalizable_hash = NULL;
471 /* objects that are ready to be finalized */
472 static FinalizeEntry *fin_ready_list = NULL;
473 /* disappearing links use the same structure but a different list */
474 static FinalizeEntry **disappearing_link_hash = NULL;
475 static mword disappearing_link_hash_size = 0;
476 static mword finalizable_hash_size = 0;
478 static mword num_registered_finalizers = 0;
479 static mword num_ready_finalizers = 0;
480 static mword num_disappearing_links = 0;
481 static int no_finalize = 0;
483 /* keep each size a multiple of ALLOC_ALIGN */
484 /* on 64 bit systems 8 is likely completely unused. */
485 static const int freelist_sizes [] = {
486 8, 16, 24, 32, 40, 48, 64, 80,
487 96, 128, 160, 192, 224, 256, 320, 384,
488 448, 512, 584, 680, 816, 1024, 1360, 2048};
489 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
491 static char* max_pinned_chunk_addr = NULL;
492 static char* min_pinned_chunk_addr = (char*)-1;
493 /* pinned_chunk_list is used for allocations of objects that are never moved */
494 static PinnedChunk *pinned_chunk_list = NULL;
495 /* internal_chunk_list is used for allocating structures needed by the GC */
496 static PinnedChunk *internal_chunk_list = NULL;
499 obj_is_from_pinned_alloc (char *p)
501 PinnedChunk *chunk = pinned_chunk_list;
502 for (; chunk; chunk = chunk->next) {
503 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
509 /* registered roots: the key to the hash is the root start address */
510 static RootRecord **roots_hash = NULL;
511 static int roots_hash_size = 0;
512 static mword roots_size = 0; /* amount of memory in the root set */
513 static mword num_roots_entries = 0;
516 * The current allocation cursors
517 * We allocate objects in the nursery.
518 * The nursery is the area between nursery_start and nursery_real_end.
519 * nursery_next is the pointer to the space where the next object will be allocated.
520 * nursery_temp_end is the pointer to the end of the temporary space reserved for
521 * the allocation: this allows us to allow allocations inside the fragments of the
522 * nursery (the empty holes between pinned objects) and it allows us to set the
523 * scan starts at reasonable intervals.
524 * nursery_next and nursery_temp_end will become per-thread vars to allow lock-free
526 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
527 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
528 * At the next allocation, the area of the nursery where objects can be present is
529 * between MIN(nursery_first_pinned_start, first_fragment_start) and
530 * MAX(nursery_last_pinned_end, nursery_temp_end)
532 static char *nursery_start = NULL;
533 static char *nursery_next = NULL;
534 static char *nursery_temp_end = NULL;
535 static char *nursery_real_end = NULL;
536 static char *nursery_frag_real_end = NULL;
537 static char *nursery_first_pinned_start = NULL;
538 static char *nursery_last_pinned_end = NULL;
540 /* fragments that are free and ready to be used for allocation */
541 static Fragment *nursery_fragments = NULL;
542 /* freeelist of fragment structures */
543 static Fragment *fragment_freelist = NULL;
546 * used when moving the objects
547 * When the nursery is collected, objects are copied to to_space.
548 * The area between to_space and gray_objects is used as a stack
549 * of objects that need their fields checked for more references
551 * We should optimize somehow this mechanism to avoid rescanning
552 * ptr-free objects. The order is also probably not optimal: need to
553 * test cache misses and other graph traversal orders.
555 static char *to_space = NULL;
556 static char *gray_objects = NULL;
557 static char *to_space_end = NULL;
558 static GCMemSection *to_space_section = NULL;
560 /* objects bigger then this go into the large object space */
561 #define MAX_SMALL_OBJ_SIZE 0xffff
564 * ######################################################################
565 * ######## Macros and function declarations.
566 * ######################################################################
569 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
570 if ((mword)(low) < lowest_heap_address) \
571 lowest_heap_address = (mword)(low); \
572 if ((mword)(high) > highest_heap_address) \
573 highest_heap_address = (mword)(high); \
577 align_pointer (void *ptr)
579 mword p = (mword)ptr;
580 p += sizeof (gpointer) - 1;
581 p &= ~ (sizeof (gpointer) - 1);
585 /* forward declarations */
586 static void* get_internal_mem (size_t size);
587 static void free_internal_mem (void *addr);
588 static void* get_os_memory (size_t size, int activate);
589 static void free_os_memory (void *addr, size_t size);
590 static void report_internal_mem_usage (void);
592 static int stop_world (void);
593 static int restart_world (void);
594 static void pin_thread_data (void *start_nursery, void *end_nursery);
595 static void scan_from_remsets (void *start_nursery, void *end_nursery);
596 static void find_pinning_ref_from_thread (char *obj, size_t size);
597 static void update_current_thread_stack (void *start);
598 static GCMemSection* alloc_section (size_t size);
599 static void finalize_in_range (void **start, void **end);
600 static void null_link_in_range (void **start, void **end);
601 static gboolean search_fragment_for_size (size_t size);
602 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
603 static void clear_remsets (void);
604 static void sweep_pinned_objects (void);
605 static void free_large_object (LOSObject *obj);
606 static void free_mem_section (GCMemSection *section);
609 * ######################################################################
610 * ######## GC descriptors
611 * ######################################################################
612 * Used to quickly get the info the GC needs about an object: size and
613 * where the references are held.
615 /* objects are aligned to 8 bytes boundaries
616 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
617 * The low 3 bits define the type of the descriptor. The other bits
618 * depend on the type.
619 * As a general rule the 13 remaining low bits define the size, either
620 * of the whole object or of the elements in the arrays. While for objects
621 * the size is already in bytes, for arrays we need to shift, because
622 * array elements might be smaller than 8 bytes. In case of arrays, we
623 * use two bits to describe what the additional high bits represents,
624 * so the default behaviour can handle element sizes less than 2048 bytes.
625 * The high 16 bits, if 0 it means the object is pointer-free.
626 * This design should make it easy and fast to skip over ptr-free data.
627 * The first 4 types should cover >95% of the objects.
628 * Note that since the size of objects is limited to 64K, larger objects
629 * will be allocated in the large object heap.
630 * If we want 4-bytes alignment, we need to put vector and small bitmap
634 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
635 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
636 DESC_TYPE_STRING, /* nothing */
637 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
638 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
639 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
640 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
641 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
642 /* subtypes for arrays and vectors */
643 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
644 DESC_TYPE_V_REFS, /* all the array elements are refs */
645 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
646 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
649 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
650 #define LOW_TYPE_BITS 3
651 #define SMALL_BITMAP_SHIFT 16
652 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
653 #define VECTOR_INFO_SHIFT 14
654 #define VECTOR_ELSIZE_SHIFT 3
655 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
656 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
657 #define SMALL_SIZE_MASK 0xfff8
658 #define MAX_ELEMENT_SIZE 0x3ff
659 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
660 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
661 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
662 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
663 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
665 #define ALLOC_ALIGN 8
668 /* Root bitmap descriptors are simpler: the lower two bits describe the type
669 * and we either have 30/62 bitmap bits or nibble-based run-length,
670 * or a complex descriptor
673 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
676 ROOT_DESC_LARGE_BITMAP,
677 ROOT_DESC_TYPE_MASK = 0x3,
678 ROOT_DESC_TYPE_SHIFT = 2,
681 static gsize* complex_descriptors = NULL;
682 static int complex_descriptors_size = 0;
683 static int complex_descriptors_next = 0;
686 alloc_complex_descriptor (gsize *bitmap, int numbits)
688 int nwords = numbits/GC_BITS_PER_WORD + 2;
693 res = complex_descriptors_next;
694 /* linear search, so we don't have duplicates with domain load/unload
695 * this should not be performance critical or we'd have bigger issues
696 * (the number and size of complex descriptors should be small).
698 for (i = 0; i < complex_descriptors_next; ) {
699 if (complex_descriptors [i] == nwords) {
701 for (j = 0; j < nwords - 1; ++j) {
702 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
712 i += complex_descriptors [i];
714 if (complex_descriptors_next + nwords > complex_descriptors_size) {
715 int new_size = complex_descriptors_size * 2 + nwords;
716 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
717 complex_descriptors_size = new_size;
719 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
720 complex_descriptors_next += nwords;
721 complex_descriptors [res] = nwords;
722 for (i = 0; i < nwords - 1; ++i) {
723 complex_descriptors [res + 1 + i] = bitmap [i];
724 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
731 * Descriptor builders.
734 mono_gc_make_descr_for_string (void)
736 return (void*) DESC_TYPE_STRING;
740 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
742 int first_set = -1, num_set = 0, last_set = -1, i;
744 size_t stored_size = obj_size;
745 stored_size += ALLOC_ALIGN - 1;
746 stored_size &= ~(ALLOC_ALIGN - 1);
747 for (i = 0; i < numbits; ++i) {
748 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
755 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
756 /* check run-length encoding first: one byte offset, one byte number of pointers
757 * on 64 bit archs, we can have 3 runs, just one on 32.
758 * It may be better to use nibbles.
761 desc = DESC_TYPE_RUN_LENGTH | stored_size;
762 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %d\n", (void*)desc, stored_size));
764 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
765 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
766 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %d, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
769 /* we know the 2-word header is ptr-free */
770 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
771 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
772 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
776 /* we know the 2-word header is ptr-free */
777 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
778 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
779 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
782 /* it's a complex object ... */
783 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
787 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
789 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
791 int first_set = -1, num_set = 0, last_set = -1, i;
792 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
793 for (i = 0; i < numbits; ++i) {
794 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
801 if (elem_size <= MAX_ELEMENT_SIZE) {
802 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
804 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
806 /* Note: we also handle structs with just ref fields */
807 if (num_set * sizeof (gpointer) == elem_size) {
808 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((-1LL) << 16));
810 /* FIXME: try run-len first */
811 /* Note: we can't skip the object header here, because it's not present */
812 if (last_set <= SMALL_BITMAP_SIZE) {
813 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
816 /* it's am array of complex structs ... */
817 desc = DESC_TYPE_COMPLEX_ARR;
818 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
822 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
823 #define STRING_SIZE(size,str) do { \
824 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
825 (size) += (ALLOC_ALIGN - 1); \
826 (size) &= ~(ALLOC_ALIGN - 1); \
829 #define OBJ_RUN_LEN_SIZE(size,vt,obj) do { \
830 (size) = (vt)->desc & 0xfff8; \
833 #define OBJ_BITMAP_SIZE(size,vt,obj) do { \
834 (size) = (vt)->desc & 0xfff8; \
837 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
838 #define PREFETCH(addr)
840 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
841 #define OBJ_RUN_LEN_FOREACH_PTR(vt,obj) do { \
842 if ((vt)->desc & 0xffff0000) { \
843 /* there are pointers */ \
844 void **_objptr_end; \
845 void **_objptr = (void**)(obj); \
846 _objptr += ((vt)->desc >> 16) & 0xff; \
847 _objptr_end = _objptr + (((vt)->desc >> 24) & 0xff); \
848 while (_objptr < _objptr_end) { \
849 HANDLE_PTR (_objptr, (obj)); \
855 /* a bitmap desc means that there are pointer references or we'd have
856 * choosen run-length, instead: add an assert to check.
858 #define OBJ_BITMAP_FOREACH_PTR(vt,obj) do { \
859 /* there are pointers */ \
860 void **_objptr = (void**)(obj); \
861 gsize _bmap = (vt)->desc >> 16; \
862 _objptr += OBJECT_HEADER_WORDS; \
865 HANDLE_PTR (_objptr, (obj)); \
872 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
873 /* there are pointers */ \
874 void **_objptr = (void**)(obj); \
875 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
876 _objptr += OBJECT_HEADER_WORDS; \
879 HANDLE_PTR (_objptr, (obj)); \
886 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
887 /* there are pointers */ \
888 void **_objptr = (void**)(obj); \
889 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
890 int bwords = (*bitmap_data) - 1; \
891 void **start_run = _objptr; \
894 MonoObject *myobj = (MonoObject*)obj; \
895 g_print ("found %d at %p (0x%x): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
897 while (bwords-- > 0) { \
898 gsize _bmap = *bitmap_data++; \
899 _objptr = start_run; \
900 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
903 HANDLE_PTR (_objptr, (obj)); \
908 start_run += GC_BITS_PER_WORD; \
912 /* this one is untested */
913 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
914 /* there are pointers */ \
915 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
916 int mbwords = (*mbitmap_data++) - 1; \
917 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
918 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
919 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
921 MonoObject *myobj = (MonoObject*)start; \
922 g_print ("found %d at %p (0x%x): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
924 while (e_start < e_end) { \
925 void **_objptr = (void**)e_start; \
926 gsize *bitmap_data = mbitmap_data; \
927 unsigned int bwords = mbwords; \
928 while (bwords-- > 0) { \
929 gsize _bmap = *bitmap_data++; \
930 void **start_run = _objptr; \
931 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
934 HANDLE_PTR (_objptr, (obj)); \
939 _objptr = start_run + GC_BITS_PER_WORD; \
941 e_start += el_size; \
945 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
946 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
947 if ((vt)->desc & 0xffffc000) { \
948 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
949 /* there are pointers */ \
950 int etype = (vt)->desc & 0xc000; \
951 if (etype == (DESC_TYPE_V_REFS << 14)) { \
952 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
953 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
954 /* Note: this code can handle also arrays of struct with only references in them */ \
955 while (p < end_refs) { \
956 HANDLE_PTR (p, (obj)); \
959 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
960 int offset = ((vt)->desc >> 16) & 0xff; \
961 int num_refs = ((vt)->desc >> 24) & 0xff; \
962 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
963 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
964 while (e_start < e_end) { \
965 void **p = (void**)e_start; \
968 for (i = 0; i < num_refs; ++i) { \
969 HANDLE_PTR (p + i, (obj)); \
971 e_start += el_size; \
973 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
974 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
975 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
976 while (e_start < e_end) { \
977 void **p = (void**)e_start; \
978 gsize _bmap = (vt)->desc >> 16; \
979 /* Note: there is no object header here to skip */ \
982 HANDLE_PTR (p, (obj)); \
987 e_start += el_size; \
993 static mword new_obj_references = 0;
994 static mword obj_references_checked = 0;
997 #define HANDLE_PTR(ptr,obj) do { \
998 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
999 new_obj_references++; \
1000 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1002 obj_references_checked++; \
1007 * ######################################################################
1008 * ######## Detecting and removing garbage.
1009 * ######################################################################
1010 * This section of code deals with detecting the objects no longer in use
1011 * and reclaiming the memory.
1013 static void __attribute__((noinline))
1014 scan_area (char *start, char *end)
1019 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1020 new_obj_references = 0;
1021 obj_references_checked = 0;
1022 while (start < end) {
1023 if (!*(void**)start) {
1024 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1027 vt = (GCVTable*)LOAD_VTABLE (start);
1028 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1030 MonoObject *obj = (MonoObject*)start;
1031 g_print ("found at %p (0x%x): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1033 type = vt->desc & 0x7;
1034 if (type == DESC_TYPE_STRING) {
1035 STRING_SIZE (skip_size, start);
1039 } else if (type == DESC_TYPE_RUN_LENGTH) {
1040 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1041 g_assert (skip_size);
1042 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1046 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1047 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1048 skip_size *= mono_array_length ((MonoArray*)start);
1049 skip_size += sizeof (MonoArray);
1050 skip_size += (ALLOC_ALIGN - 1);
1051 skip_size &= ~(ALLOC_ALIGN - 1);
1052 OBJ_VECTOR_FOREACH_PTR (vt, start);
1053 if (type == DESC_TYPE_ARRAY) {
1054 /* account for the bounds */
1059 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1060 OBJ_BITMAP_SIZE (skip_size, vt, start);
1061 g_assert (skip_size);
1062 OBJ_BITMAP_FOREACH_PTR (vt,start);
1066 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1067 skip_size = safe_object_get_size ((MonoObject*)start);
1068 skip_size += (ALLOC_ALIGN - 1);
1069 skip_size &= ~(ALLOC_ALIGN - 1);
1070 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1074 } else if (type == DESC_TYPE_COMPLEX) {
1075 /* this is a complex object */
1076 skip_size = safe_object_get_size ((MonoObject*)start);
1077 skip_size += (ALLOC_ALIGN - 1);
1078 skip_size &= ~(ALLOC_ALIGN - 1);
1079 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1083 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1084 /* this is an array of complex structs */
1085 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1086 skip_size *= mono_array_length ((MonoArray*)start);
1087 skip_size += sizeof (MonoArray);
1088 skip_size += (ALLOC_ALIGN - 1);
1089 skip_size &= ~(ALLOC_ALIGN - 1);
1090 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1091 if (type == DESC_TYPE_ARRAY) {
1092 /* account for the bounds */
1101 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1102 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1103 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1106 static void __attribute__((noinline))
1107 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1112 while (start < end) {
1113 if (!*(void**)start) {
1114 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1117 vt = (GCVTable*)LOAD_VTABLE (start);
1118 /* handle threads someway (maybe insert the root domain vtable?) */
1119 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1120 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1125 type = vt->desc & 0x7;
1126 if (type == DESC_TYPE_STRING) {
1127 STRING_SIZE (skip_size, start);
1128 if (remove) memset (start, 0, skip_size);
1131 } else if (type == DESC_TYPE_RUN_LENGTH) {
1132 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1133 g_assert (skip_size);
1134 if (remove) memset (start, 0, skip_size);
1137 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1138 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1139 skip_size *= mono_array_length ((MonoArray*)start);
1140 skip_size += sizeof (MonoArray);
1141 skip_size += (ALLOC_ALIGN - 1);
1142 skip_size &= ~(ALLOC_ALIGN - 1);
1143 if (type == DESC_TYPE_ARRAY) {
1144 /* account for the bounds */
1146 if (remove) memset (start, 0, skip_size);
1149 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1150 OBJ_BITMAP_SIZE (skip_size, vt, start);
1151 g_assert (skip_size);
1152 if (remove) memset (start, 0, skip_size);
1155 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1156 skip_size = safe_object_get_size ((MonoObject*)start);
1157 skip_size += (ALLOC_ALIGN - 1);
1158 skip_size &= ~(ALLOC_ALIGN - 1);
1159 if (remove) memset (start, 0, skip_size);
1162 } else if (type == DESC_TYPE_COMPLEX) {
1163 /* this is a complex object */
1164 skip_size = safe_object_get_size ((MonoObject*)start);
1165 skip_size += (ALLOC_ALIGN - 1);
1166 skip_size &= ~(ALLOC_ALIGN - 1);
1167 if (remove) memset (start, 0, skip_size);
1170 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1171 /* this is an array of complex structs */
1172 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1173 skip_size *= mono_array_length ((MonoArray*)start);
1174 skip_size += sizeof (MonoArray);
1175 skip_size += (ALLOC_ALIGN - 1);
1176 skip_size &= ~(ALLOC_ALIGN - 1);
1177 if (type == DESC_TYPE_ARRAY) {
1178 /* account for the bounds */
1180 if (remove) memset (start, 0, skip_size);
1190 * When appdomains are unloaded we can easily remove objects that have finalizers,
1191 * but all the others could still be present in random places on the heap.
1192 * We need a sweep to get rid of them even though it's going to be costly
1194 * The reason we need to remove them is because we access the vtable and class
1195 * structures to know the object size and the reference bitmap: once the domain is
1196 * unloaded the point to random memory.
1199 mono_gc_clear_domain (MonoDomain * domain)
1201 GCMemSection *section;
1203 for (section = section_list; section; section = section->next) {
1204 scan_area_for_domain (domain, section->data, section->end_data);
1206 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1211 add_to_global_remset (gpointer ptr)
1214 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1215 if (global_remset->store_next < global_remset->end_set) {
1216 *(global_remset->store_next++) = (mword)ptr;
1219 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1220 rs->next = global_remset;
1222 *(global_remset->store_next++) = (mword)ptr;
1226 * This is how the copying happens from the nursery to the old generation.
1227 * We assume that at this time all the pinned objects have been identified and
1229 * We run scan_object() for each pinned object so that each referenced
1230 * objects if possible are copied. The new gray objects created can have
1231 * scan_object() run on them right away, too.
1232 * Then we run copy_object() for the precisely tracked roots. At this point
1233 * all the roots are either gray or black. We run scan_object() on the gray
1234 * objects until no more gray objects are created.
1235 * At the end of the process we walk again the pinned list and we unmark
1236 * the pinned flag. As we go we also create the list of free space for use
1237 * in the next allocation runs.
1239 * We need to remember objects from the old generation that point to the new one
1240 * (or just addresses?).
1242 * copy_object could be made into a macro once debugged (use inline for now).
1245 static char* __attribute__((noinline))
1246 copy_object (char *obj, char *from_space_start, char *from_space_end)
1248 if (obj >= from_space_start && obj < from_space_end) {
1252 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1253 if ((forwarded = object_is_forwarded (obj))) {
1254 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1255 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1258 if (object_is_pinned (obj)) {
1259 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1260 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1263 objsize = safe_object_get_size ((MonoObject*)obj);
1264 objsize += ALLOC_ALIGN - 1;
1265 objsize &= ~(ALLOC_ALIGN - 1);
1266 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %d)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1267 /* FIXME: handle pinned allocs:
1268 * Large objects are simple, at least until we always follow the rule:
1269 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1270 * At the end of major collections, we walk the los list and if
1271 * the object is pinned, it is marked, otherwise it can be freed.
1273 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1274 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %d\n", obj, safe_name (obj), objsize));
1278 /* ok, the object is not pinned, we can move it */
1279 /* use a optimized memcpy here */
1284 char* edi = gray_objects;
1285 __asm__ __volatile__(
1287 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1288 : "0" (objsize/4), "1" (edi),"2" (esi)
1293 memcpy (gray_objects, obj, objsize);
1295 /* adjust array->bounds */
1296 vt = ((MonoObject*)obj)->vtable;
1297 g_assert (vt->gc_descr);
1298 if (vt->rank && ((MonoArray*)obj)->bounds) {
1299 MonoArray *array = (MonoArray*)gray_objects;
1300 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1301 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %d, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1303 /* set the forwarding pointer */
1304 forward_object (obj, gray_objects);
1306 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1307 gray_objects += objsize;
1308 DEBUG (8, g_assert (gray_objects <= to_space_end));
1315 #define HANDLE_PTR(ptr,obj) do { \
1317 void *__old = *(ptr); \
1318 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1319 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1320 if (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) \
1321 add_to_global_remset ((ptr)); \
1326 * Scan the object pointed to by @start for references to
1327 * other objects between @from_start and @from_end and copy
1328 * them to the gray_objects area.
1329 * Returns a pointer to the end of the object.
1332 scan_object (char *start, char* from_start, char* from_end)
1337 vt = (GCVTable*)LOAD_VTABLE (start);
1338 //type = vt->desc & 0x7;
1340 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1341 switch (vt->desc & 0x7) {
1342 //if (type == DESC_TYPE_STRING) {
1343 case DESC_TYPE_STRING:
1344 STRING_SIZE (skip_size, start);
1345 return start + skip_size;
1346 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1347 case DESC_TYPE_RUN_LENGTH:
1348 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1349 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1350 g_assert (skip_size);
1351 return start + skip_size;
1352 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1353 case DESC_TYPE_ARRAY:
1354 case DESC_TYPE_VECTOR:
1355 OBJ_VECTOR_FOREACH_PTR (vt, start);
1356 skip_size = safe_object_get_size ((MonoObject*)start);
1358 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1359 skip_size *= mono_array_length ((MonoArray*)start);
1360 skip_size += sizeof (MonoArray);
1362 skip_size += (ALLOC_ALIGN - 1);
1363 skip_size &= ~(ALLOC_ALIGN - 1);
1364 return start + skip_size;
1365 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1366 case DESC_TYPE_SMALL_BITMAP:
1367 OBJ_BITMAP_FOREACH_PTR (vt,start);
1368 OBJ_BITMAP_SIZE (skip_size, vt, start);
1369 return start + skip_size;
1370 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1371 case DESC_TYPE_LARGE_BITMAP:
1372 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1373 skip_size = safe_object_get_size ((MonoObject*)start);
1374 skip_size += (ALLOC_ALIGN - 1);
1375 skip_size &= ~(ALLOC_ALIGN - 1);
1376 return start + skip_size;
1377 //} else if (type == DESC_TYPE_COMPLEX) {
1378 case DESC_TYPE_COMPLEX:
1379 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1380 /* this is a complex object */
1381 skip_size = safe_object_get_size ((MonoObject*)start);
1382 skip_size += (ALLOC_ALIGN - 1);
1383 skip_size &= ~(ALLOC_ALIGN - 1);
1384 return start + skip_size;
1385 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1386 case DESC_TYPE_COMPLEX_ARR:
1387 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1388 /* this is an array of complex structs */
1389 skip_size = safe_object_get_size ((MonoObject*)start);
1391 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1392 skip_size *= mono_array_length ((MonoArray*)start);
1393 skip_size += sizeof (MonoArray);
1395 skip_size += (ALLOC_ALIGN - 1);
1396 skip_size &= ~(ALLOC_ALIGN - 1);
1397 return start + skip_size;
1399 g_assert_not_reached ();
1404 * Addresses from start to end are already sorted. This function finds the object header
1405 * for each address and pins the object. The addresses must be inside the passed section.
1406 * Return the number of pinned objects.
1409 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1414 void *last_obj = NULL;
1415 size_t last_obj_size = 0;
1418 void **definitely_pinned = start;
1419 while (start < end) {
1421 /* the range check should be reduntant */
1422 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1423 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1424 /* multiple pointers to the same object */
1425 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1429 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1430 search_start = (void*)section->scan_starts [idx];
1431 if (!search_start || search_start > addr) {
1434 search_start = section->scan_starts [idx];
1435 if (search_start && search_start <= addr)
1438 if (!search_start || search_start > addr)
1439 search_start = start_nursery;
1441 if (search_start < last_obj)
1442 search_start = (char*)last_obj + last_obj_size;
1443 /* now addr should be in an object a short distance from search_start
1444 * Note that search_start must point to zeroed mem or point to an object.
1447 if (!*(void**)search_start) {
1448 mword p = (mword)search_start;
1449 p += sizeof (gpointer);
1450 p += ALLOC_ALIGN - 1;
1451 p &= ~(ALLOC_ALIGN - 1);
1452 search_start = (void*)p;
1455 last_obj = search_start;
1456 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1457 last_obj_size += ALLOC_ALIGN - 1;
1458 last_obj_size &= ~(ALLOC_ALIGN - 1);
1459 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %d\n", last_obj, safe_name (last_obj), last_obj_size));
1460 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1461 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1462 pin_object (search_start);
1463 definitely_pinned [count] = search_start;
1467 /* skip to the next object */
1468 search_start = (void*)((char*)search_start + last_obj_size);
1469 } while (search_start <= addr);
1470 /* we either pinned the correct object or we ignored the addr because
1471 * it points to unused zeroed memory.
1477 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1481 static void** pin_queue;
1482 static int pin_queue_size = 0;
1483 static int next_pin_slot = 0;
1488 gap = (gap * 10) / 13;
1489 if (gap == 9 || gap == 10)
1498 compare_addr (const void *a, const void *b)
1500 return *(const void **)a - *(const void **)b;
1504 /* sort the addresses in array in increasing order */
1506 sort_addresses (void **array, int size)
1509 * qsort is slower as predicted.
1510 * qsort (array, size, sizeof (gpointer), compare_addr);
1517 gap = new_gap (gap);
1520 for (i = 0; i < end; i++) {
1522 if (array [i] > array [j]) {
1523 void* val = array [i];
1524 array [i] = array [j];
1529 if (gap == 1 && !swapped)
1535 print_nursery_gaps (void* start_nursery, void *end_nursery)
1538 gpointer first = start_nursery;
1540 for (i = 0; i < next_pin_slot; ++i) {
1541 next = pin_queue [i];
1542 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1546 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1549 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1551 optimize_pin_queue (int start_slot)
1553 void **start, **cur, **end;
1554 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1555 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1556 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1557 if ((next_pin_slot - start_slot) > 1)
1558 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1559 start = cur = pin_queue + start_slot;
1560 end = pin_queue + next_pin_slot;
1563 while (*start == *cur && cur < end)
1567 next_pin_slot = start - pin_queue;
1568 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1569 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1574 realloc_pin_queue (void)
1576 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1577 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1578 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1579 free_internal_mem (pin_queue);
1580 pin_queue = new_pin;
1581 pin_queue_size = new_size;
1582 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1586 * Scan the memory between start and end and queue values which could be pointers
1587 * to the area between start_nursery and end_nursery for later consideration.
1588 * Typically used for thread stacks.
1591 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1594 while (start < end) {
1595 if (*start >= start_nursery && *start < end_nursery) {
1597 * *start can point to the middle of an object
1598 * note: should we handle pointing at the end of an object?
1599 * pinning in C# code disallows pointing at the end of an object
1600 * but there is some small chance that an optimizing C compiler
1601 * may keep the only reference to an object by pointing
1602 * at the end of it. We ignore this small chance for now.
1603 * Pointers to the end of an object are indistinguishable
1604 * from pointers to the start of the next object in memory
1605 * so if we allow that we'd need to pin two objects...
1606 * We queue the pointer in an array, the
1607 * array will then be sorted and uniqued. This way
1608 * we can coalesce several pinning pointers and it should
1609 * be faster since we'd do a memory scan with increasing
1610 * addresses. Note: we can align the address to the allocation
1611 * alignment, so the unique process is more effective.
1613 mword addr = (mword)*start;
1614 addr &= ~(ALLOC_ALIGN - 1);
1615 if (next_pin_slot >= pin_queue_size)
1616 realloc_pin_queue ();
1617 pin_queue [next_pin_slot++] = (void*)addr;
1618 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1623 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1627 * If generation is 0, just mark objects in the nursery, the others we don't care,
1628 * since they are not going to move anyway.
1629 * There are different areas that are scanned for pinned pointers:
1630 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1631 * *) the pinned handle table
1632 * *) the pinned roots
1634 * Note: when we'll use a write barrier for old to new gen references, we need to
1635 * keep track of old gen objects that point to pinned new gen objects because in that
1636 * case the referenced object will be moved maybe at the next collection, but there
1637 * is no write in the old generation area where the pinned object is referenced
1638 * and we may not consider it as reachable.
1641 mark_pinned_objects (int generation)
1646 * Debugging function: find in the conservative roots where @obj is being pinned.
1649 find_pinning_reference (char *obj, size_t size)
1653 char *endobj = obj + size;
1654 for (i = 0; i < roots_hash_size; ++i) {
1655 for (root = roots_hash [i]; root; root = root->next) {
1656 /* if desc is non-null it has precise info */
1657 if (!root->root_desc) {
1658 char ** start = (char**)root->start_root;
1659 while (start < (char**)root->end_root) {
1660 if (*start >= obj && *start < endobj) {
1661 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1668 find_pinning_ref_from_thread (obj, size);
1672 * The first thing we do in a collection is to identify pinned objects.
1673 * This function considers all the areas of memory that need to be
1674 * conservatively scanned.
1677 pin_from_roots (void *start_nursery, void *end_nursery)
1681 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1682 /* objects pinned from the API are inside these roots */
1683 for (i = 0; i < roots_hash_size; ++i) {
1684 for (root = roots_hash [i]; root; root = root->next) {
1685 /* if desc is non-null it has precise info */
1686 if (root->root_desc)
1688 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1689 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1692 /* now deal with the thread stacks
1693 * in the future we should be able to conservatively scan only:
1694 * *) the cpu registers
1695 * *) the unmanaged stack frames
1696 * *) the _last_ managed stack frame
1697 * *) pointers slots in managed frames
1699 pin_thread_data (start_nursery, end_nursery);
1703 * The memory area from start_root to end_root contains pointers to objects.
1704 * Their position is precisely described by @desc (this means that the pointer
1705 * can be either NULL or the pointer to the start of an object).
1706 * This functions copies them to to_space updates them.
1709 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1711 switch (desc & ROOT_DESC_TYPE_MASK) {
1712 case ROOT_DESC_BITMAP:
1713 desc >>= ROOT_DESC_TYPE_SHIFT;
1715 if ((desc & 1) && *start_root) {
1716 *start_root = copy_object (*start_root, n_start, n_end);
1717 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1723 case ROOT_DESC_RUN_LEN:
1724 case ROOT_DESC_LARGE_BITMAP:
1725 g_assert_not_reached ();
1730 alloc_fragment (void)
1732 Fragment *frag = fragment_freelist;
1734 fragment_freelist = frag->next;
1738 frag = get_internal_mem (sizeof (Fragment));
1744 * Allocate and setup the data structures needed to be able to allocate objects
1745 * in the nursery. The nursery is stored in nursery_section.
1748 alloc_nursery (void)
1750 GCMemSection *section;
1755 if (nursery_section)
1757 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %d\n", nursery_size));
1758 /* later we will alloc a larger area for the nursery but only activate
1759 * what we need. The rest will be used as expansion if we have too many pinned
1760 * objects in the existing nursery.
1762 /* FIXME: handle OOM */
1763 section = get_internal_mem (sizeof (GCMemSection));
1764 data = get_os_memory (nursery_size, TRUE);
1765 nursery_start = nursery_next = data;
1766 nursery_real_end = data + nursery_size;
1767 nursery_temp_end = data + SCAN_START_SIZE;
1768 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1769 total_alloc += nursery_size;
1770 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", nursery_size, total_alloc));
1771 section->data = section->next_data = data;
1772 section->size = nursery_size;
1773 section->end_data = nursery_real_end;
1774 scan_starts = nursery_size / SCAN_START_SIZE;
1775 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1776 section->num_scan_start = scan_starts;
1778 /* add to the section list */
1779 section->next = section_list;
1780 section_list = section;
1782 nursery_section = section;
1784 /* Setup the single first large fragment */
1785 frag = alloc_fragment ();
1786 frag->fragment_start = nursery_start;
1787 frag->fragment_limit = nursery_start;
1788 frag->fragment_end = nursery_real_end;
1789 nursery_frag_real_end = nursery_real_end;
1790 /* FIXME: frag here is lost */
1794 * Update roots in the old generation. Since we currently don't have the
1795 * info from the write barriers, we just scan all the objects.
1798 scan_old_generation (char *start, char* end)
1800 GCMemSection *section;
1802 LOSObject *big_object;
1805 for (section = section_list; section; section = section->next) {
1806 if (section == nursery_section)
1808 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1809 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1811 while (p < section->next_data) {
1816 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1817 p = scan_object (p, start, end);
1820 /* scan the old object space, too */
1821 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1822 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %d\n", big_object->data, safe_name (big_object->data), big_object->size));
1823 scan_object (big_object->data, start, end);
1825 /* scan the list of objects ready for finalization */
1826 for (fin = fin_ready_list; fin; fin = fin->next) {
1827 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1828 fin->object = copy_object (fin->object, start, end);
1832 static mword fragment_total = 0;
1834 * We found a fragment of free memory in the nursery: memzero it and if
1835 * it is big enough, add it to the list of fragments that can be used for
1839 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1842 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %d\n", frag_start, frag_end, frag_size));
1843 /* memsetting just the first chunk start is bound to provide better cache locality */
1844 memset (frag_start, 0, frag_size);
1845 /* Not worth dealing with smaller fragments: need to tune */
1846 if (frag_size >= FRAGMENT_MIN_SIZE) {
1847 fragment = alloc_fragment ();
1848 fragment->fragment_start = frag_start;
1849 fragment->fragment_limit = frag_start;
1850 fragment->fragment_end = frag_end;
1851 fragment->next = nursery_fragments;
1852 nursery_fragments = fragment;
1853 fragment_total += frag_size;
1858 drain_gray_stack (char *start_addr, char *end_addr)
1866 * We copied all the reachable objects. Now it's the time to copy
1867 * the objects that were not referenced by the roots, but by the copied objects.
1868 * we built a stack of objects pointed to by gray_start: they are
1869 * additional roots and we may add more items as we go.
1870 * We loop until gray_start == gray_objects which means no more objects have
1871 * been added. Note this is iterative: no recursion is involved.
1872 * We need to walk the LO list as well in search of marked big objects
1873 * (use a flag since this is needed only on major collections). We need to loop
1874 * here as well, so keep a counter of marked LO (increasing it in copy_object).
1877 gray_start = to_space;
1878 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1879 while (gray_start < gray_objects) {
1880 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1881 gray_start = scan_object (gray_start, start_addr, end_addr);
1884 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
1885 //scan_old_generation (start_addr, end_addr);
1886 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
1887 /* walk the finalization queue and move also the objects that need to be
1888 * finalized: use the finalized objects as new roots so the objects they depend
1889 * on are also not reclaimed. As with the roots above, only objects in the nursery
1890 * are marked/copied.
1891 * We need a loop here, since objects ready for finalizers may reference other objects
1892 * that are fin-ready. Speedup with a flag?
1895 fin_ready = num_ready_finalizers;
1896 finalize_in_range ((void**)start_addr, (void**)end_addr);
1898 /* drain the new stack that might have been created */
1899 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1900 while (gray_start < gray_objects) {
1901 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1902 gray_start = scan_object (gray_start, start_addr, end_addr);
1904 } while (fin_ready != num_ready_finalizers);
1906 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
1907 to_space = gray_start;
1908 to_space_section->next_data = to_space;
1911 * handle disappearing links
1912 * Note we do this after checking the finalization queue because if an object
1913 * survives (at least long enough to be finalized) we don't clear the link.
1914 * This also deals with a possible issue with the monitor reclamation: with the Boehm
1915 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
1918 null_link_in_range ((void**)start_addr, (void**)end_addr);
1920 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
1923 static int last_num_pinned = 0;
1926 build_nursery_fragments (int start_pin, int end_pin)
1928 char *frag_start, *frag_end;
1932 /* FIXME: handle non-NULL fragment_freelist */
1933 fragment_freelist = nursery_fragments;
1934 nursery_fragments = NULL;
1935 frag_start = nursery_start;
1937 /* clear scan starts */
1938 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
1939 for (i = start_pin; i < end_pin; ++i) {
1940 frag_end = pin_queue [i];
1941 /* remove the pin bit from pinned objects */
1942 unpin_object (frag_end);
1943 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
1944 frag_size = frag_end - frag_start;
1946 add_nursery_frag (frag_size, frag_start, frag_end);
1947 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
1948 frag_size += ALLOC_ALIGN - 1;
1949 frag_size &= ~(ALLOC_ALIGN - 1);
1950 frag_start = (char*)pin_queue [i] + frag_size;
1952 nursery_last_pinned_end = frag_start;
1953 frag_end = nursery_real_end;
1954 frag_size = frag_end - frag_start;
1956 add_nursery_frag (frag_size, frag_start, frag_end);
1957 if (!nursery_fragments) {
1958 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
1959 for (i = start_pin; i < end_pin; ++i) {
1960 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
1966 /* FIXME: later reduce code duplication here with the above
1967 * We don't keep track of section fragments for non-nursery sections yet, so
1971 build_section_fragments (GCMemSection *section)
1974 char *frag_start, *frag_end;
1977 /* clear scan starts */
1978 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
1979 frag_start = section->data;
1980 section->next_data = section->data;
1981 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
1982 frag_end = pin_queue [i];
1983 /* remove the pin bit from pinned objects */
1984 unpin_object (frag_end);
1985 if (frag_end >= section->data + section->size) {
1986 frag_end = section->data + section->size;
1988 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
1990 frag_size = frag_end - frag_start;
1992 memset (frag_start, 0, frag_size);
1993 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
1994 frag_size += ALLOC_ALIGN - 1;
1995 frag_size &= ~(ALLOC_ALIGN - 1);
1996 frag_start = (char*)pin_queue [i] + frag_size;
1997 section->next_data = MAX (section->next_data, frag_start);
1999 frag_end = section->end_data;
2000 frag_size = frag_end - frag_start;
2002 memset (frag_start, 0, frag_size);
2006 * Collect objects in the nursery.
2009 collect_nursery (size_t requested_size)
2011 GCMemSection *section;
2012 size_t max_garbage_amount;
2019 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2020 /* FIXME: optimize later to use the higher address where an object can be present */
2021 nursery_next = MAX (nursery_next, nursery_real_end);
2023 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2024 max_garbage_amount = nursery_next - nursery_start;
2026 * not enough room in the old generation to store all the possible data from
2027 * the nursery in a single continuous space.
2028 * We reset to_space if we allocated objects in degraded mode.
2030 if (to_space_section)
2031 to_space = gray_objects = to_space_section->next_data;
2032 if ((to_space_end - to_space) < max_garbage_amount) {
2033 section = alloc_section (nursery_section->size * 4);
2034 g_assert (nursery_section->size >= max_garbage_amount);
2035 to_space = gray_objects = section->next_data;
2036 to_space_end = section->end_data;
2037 to_space_section = section;
2039 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2040 nursery_section->next_data = nursery_next;
2043 /* world must be stopped already */
2045 /* pin from pinned handles */
2046 pin_from_roots (nursery_start, nursery_next);
2047 /* identify pinned objects */
2048 optimize_pin_queue (0);
2049 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2051 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2052 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2055 * walk all the roots and copy the young objects to the old generation,
2056 * starting from to_space
2059 scan_from_remsets (nursery_start, nursery_next);
2060 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2062 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2063 /* FIXME: later scan also alloc_pinned objects */
2065 /* the pinned objects are roots */
2066 for (i = 0; i < next_pin_slot; ++i) {
2067 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2068 scan_object (pin_queue [i], nursery_start, nursery_next);
2070 /* registered roots, this includes static fields */
2071 for (i = 0; i < roots_hash_size; ++i) {
2072 for (root = roots_hash [i]; root; root = root->next) {
2073 /* if desc is non-null it has precise info */
2074 if (!root->root_desc)
2076 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2077 precisely_scan_objects_from ((void**)root->start_root, root->end_root, nursery_start, nursery_next, root->root_desc);
2081 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2083 drain_gray_stack (nursery_start, nursery_next);
2085 /* walk the pin_queue, build up the fragment list of free memory, unmark
2086 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2089 build_nursery_fragments (0, next_pin_slot);
2091 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %d bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2093 /* prepare the pin queue for the next collection */
2094 last_num_pinned = next_pin_slot;
2096 if (fin_ready_list) {
2097 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2098 mono_gc_finalize_notify ();
2103 major_collection (void)
2105 GCMemSection *section, *prev_section;
2106 LOSObject *bigobj, *prevbo;
2114 /* FIXME: only use these values for the precise scan
2115 * note that to_space pointers should be excluded anyway...
2117 char *heap_start = NULL;
2118 char *heap_end = (char*)-1;
2119 size_t copy_space_required = 0;
2122 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2125 * FIXME: implement Mark/Compact
2126 * Until that is done, we can just apply mostly the same alg as for the nursery:
2127 * this means we need a big section to potentially copy all the other sections, so
2128 * it is not ideal specially with large heaps.
2130 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2131 collect_nursery (0);
2134 /* FIXME: make sure the nursery next_data ptr is updated */
2135 nursery_section->next_data = nursery_real_end;
2136 /* we should also coalesce scanning from sections close to each other
2137 * and deal with pointers outside of the sections later.
2139 /* The remsets are not useful for a major collection */
2141 /* world must be stopped already */
2143 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2144 for (section = section_list; section; section = section->next) {
2145 section->pin_queue_start = count = section->pin_queue_end = next_pin_slot;
2146 pin_from_roots (section->data, section->next_data);
2147 if (count != next_pin_slot) {
2149 optimize_pin_queue (count);
2150 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2151 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2152 section->pin_queue_end = next_pin_slot = count + reduced_to;
2154 copy_space_required += (char*)section->next_data - (char*)section->data;
2156 /* identify possible pointers to the insize of large objects */
2157 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2158 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2159 count = next_pin_slot;
2160 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2161 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2162 if (next_pin_slot != count) {
2163 next_pin_slot = count;
2164 pin_object (bigobj->data);
2165 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %d from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2168 /* look for pinned addresses for pinned-alloc objects */
2169 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2170 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2171 count = next_pin_slot;
2172 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2173 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2174 if (next_pin_slot != count) {
2175 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2176 next_pin_slot = count;
2181 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2182 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2184 /* allocate the big to space */
2185 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %d\n", copy_space_required));
2186 section = alloc_section (copy_space_required);
2187 to_space = gray_objects = section->next_data;
2188 to_space_end = section->end_data;
2189 to_space_section = section;
2191 /* the old generation doesn't need to be scanned (no remembered sets or card
2192 * table needed either): the only objects that must survive are those pinned and
2193 * those referenced by the precise roots.
2194 * mark any section without pinned objects, so we can free it since we will be able to
2195 * move all the objects.
2197 /* the pinned objects are roots */
2198 for (i = 0; i < next_pin_slot; ++i) {
2199 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2200 scan_object (pin_queue [i], heap_start, heap_end);
2202 /* registered roots, this includes static fields */
2203 for (i = 0; i < roots_hash_size; ++i) {
2204 for (root = roots_hash [i]; root; root = root->next) {
2205 /* if desc is non-null it has precise info */
2206 if (!root->root_desc)
2208 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2209 precisely_scan_objects_from ((void**)root->start_root, root->end_root, heap_start, heap_end, root->root_desc);
2212 /* scan the list of objects ready for finalization */
2213 for (fin = fin_ready_list; fin; fin = fin->next) {
2214 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2215 fin->object = copy_object (fin->object, heap_start, heap_end);
2218 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2220 /* all the objects in the heap */
2221 drain_gray_stack (heap_start, heap_end);
2223 /* sweep the big objects list */
2225 for (bigobj = los_object_list; bigobj;) {
2226 if (object_is_pinned (bigobj->data)) {
2227 unpin_object (bigobj->data);
2230 /* not referenced anywhere, so we can free it */
2232 prevbo->next = bigobj->next;
2234 los_object_list = bigobj->next;
2236 bigobj = bigobj->next;
2237 free_large_object (to_free);
2241 bigobj = bigobj->next;
2243 /* unpin objects from the pinned chunks and free the unmarked ones */
2244 sweep_pinned_objects ();
2246 /* free the unused sections */
2247 prev_section = NULL;
2248 for (section = section_list; section;) {
2249 /* to_space doesn't need handling here and the nursery is special */
2250 if (section == to_space_section || section == nursery_section) {
2251 prev_section = section;
2252 section = section->next;
2255 /* no pinning object, so the section is free */
2256 if (section->pin_queue_start == section->pin_queue_end) {
2257 GCMemSection *to_free;
2259 prev_section->next = section->next;
2261 section_list = section->next;
2263 section = section->next;
2264 free_mem_section (to_free);
2267 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2268 build_section_fragments (section);
2270 prev_section = section;
2271 section = section->next;
2274 /* walk the pin_queue, build up the fragment list of free memory, unmark
2275 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2278 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2280 /* prepare the pin queue for the next collection */
2282 if (fin_ready_list) {
2283 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2284 mono_gc_finalize_notify ();
2289 * Allocate a new section of memory to be used as old generation.
2291 static GCMemSection*
2292 alloc_section (size_t size)
2294 GCMemSection *section;
2297 size_t new_size = next_section_size;
2299 if (size > next_section_size) {
2301 new_size += pagesize - 1;
2302 new_size &= ~(pagesize - 1);
2304 section_size_used++;
2305 if (section_size_used > 3) {
2306 section_size_used = 0;
2307 next_section_size *= 2;
2308 if (next_section_size > max_section_size)
2309 next_section_size = max_section_size;
2311 section = get_internal_mem (sizeof (GCMemSection));
2312 data = get_os_memory (new_size, TRUE);
2313 section->data = section->next_data = data;
2314 section->size = new_size;
2315 section->end_data = data + new_size;
2316 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2317 total_alloc += new_size;
2318 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", new_size, total_alloc));
2319 section->data = data;
2320 section->size = new_size;
2321 scan_starts = new_size / SCAN_START_SIZE;
2322 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2323 section->num_scan_start = scan_starts;
2325 /* add to the section list */
2326 section->next = section_list;
2327 section_list = section;
2333 free_mem_section (GCMemSection *section)
2335 char *data = section->data;
2336 size_t size = section->size;
2337 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %d\n", data, size));
2338 free_os_memory (data, size);
2339 free_internal_mem (section);
2340 total_alloc -= size;
2344 * When deciding if it's better to collect or to expand, keep track
2345 * of how much garbage was reclaimed with the last collection: if it's too
2347 * This is called when we could not allocate a small object.
2349 static void __attribute__((noinline))
2350 minor_collect_or_expand_inner (size_t size)
2352 int do_minor_collection = 1;
2354 if (!nursery_section) {
2358 if (do_minor_collection) {
2360 collect_nursery (size);
2361 DEBUG (2, fprintf (gc_debug_file, "Heap size: %d, LOS size: %d\n", total_alloc, los_memory_usage));
2363 /* this also sets the proper pointers for the next allocation */
2364 if (!search_fragment_for_size (size)) {
2366 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2367 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %d alloc (%d pinned)", size, last_num_pinned));
2368 for (i = 0; i < last_num_pinned; ++i) {
2369 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2374 //report_internal_mem_usage ();
2378 * ######################################################################
2379 * ######## Memory allocation from the OS
2380 * ######################################################################
2381 * This section of code deals with getting memory from the OS and
2382 * allocating memory for GC-internal data structures.
2383 * Internal memory can be handled with a freelist for small objects.
2386 #ifndef MAP_ANONYMOUS
2387 #define MAP_ANONYMOUS MAP_ANON
2391 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2392 * This must not require any lock.
2395 get_os_memory (size_t size, int activate)
2398 unsigned long prot_flags = activate? MONO_MMAP_READ|MONO_MMAP_WRITE: MONO_MMAP_NONE;
2400 prot_flags |= MONO_MMAP_PRIVATE | MONO_MMAP_ANON;
2401 size += pagesize - 1;
2402 size &= ~(pagesize - 1);
2403 ptr = mono_valloc (0, size, prot_flags);
2408 * Free the memory returned by get_os_memory (), returning it to the OS.
2411 free_os_memory (void *addr, size_t size)
2413 munmap (addr, size);
2420 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2422 int i, free_pages, num_free, free_mem;
2424 for (i = 0; i < chunk->num_pages; ++i) {
2425 if (!chunk->page_sizes [i])
2428 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2429 free_mem = FREELIST_PAGESIZE * free_pages;
2430 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2431 if (!chunk->free_list [i])
2434 p = chunk->free_list [i];
2439 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2440 free_mem += freelist_sizes [i] * num_free;
2442 printf ("\tfree memory in chunk: %d\n", free_mem);
2449 report_internal_mem_usage (void) {
2452 printf ("Internal memory usage:\n");
2454 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2455 report_pinned_chunk (chunk, i++);
2457 printf ("Pinned memory usage:\n");
2459 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2460 report_pinned_chunk (chunk, i++);
2465 * the array of pointers from @start to @end contains conservative
2466 * pointers to objects inside @chunk: mark each referenced object
2470 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2472 for (; start < end; start++) {
2473 char *addr = *start;
2474 int offset = (char*)addr - (char*)chunk;
2475 int page = offset / FREELIST_PAGESIZE;
2476 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2477 int slot_size = chunk->page_sizes [page];
2479 /* the page is not allocated */
2482 /* would be faster if we restrict the sizes to power of two,
2483 * but that's a waste of memory: need to measure. it could reduce
2484 * fragmentation since there are less pages needed, if for example
2485 * someone interns strings of each size we end up with one page per
2486 * interned string (still this is just ~40 KB): with more fine-grained sizes
2487 * this increases the number of used pages.
2490 obj_offset /= slot_size;
2491 obj_offset *= slot_size;
2492 addr = (char*)chunk->start_data + obj_offset;
2494 obj_offset /= slot_size;
2495 obj_offset *= slot_size;
2496 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2499 /* if the vtable is inside the chunk it's on the freelist, so skip */
2500 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2502 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2508 sweep_pinned_objects (void)
2515 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2516 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2517 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2518 for (i = 0; i < chunk->num_pages; ++i) {
2519 obj_size = chunk->page_sizes [i];
2522 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2523 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2524 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2525 while (p + obj_size <= endp) {
2527 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2528 /* if the first word (the vtable) is outside the chunk we have an object */
2529 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2530 if (object_is_pinned (ptr)) {
2532 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2534 /* FIXME: add to freelist */
2535 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2545 * Find the slot number in the freelist for memory chunks that
2546 * can contain @size objects.
2549 slot_for_size (size_t size)
2552 /* do a binary search or lookup table later. */
2553 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2554 if (freelist_sizes [slot] >= size)
2557 g_assert_not_reached ();
2562 * Build a free list for @size memory chunks from the memory area between
2563 * start_page and end_page.
2566 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2570 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2572 end = (void**)(end_page - size);
2573 g_assert (!chunk->free_list [slot]);
2574 chunk->free_list [slot] = p;
2575 while ((char*)p + size <= end) {
2577 *p = (void*)((char*)p + size);
2581 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2585 alloc_pinned_chunk (size_t size)
2590 size += pagesize; /* at least one page */
2591 size += pagesize - 1;
2592 size &= ~(pagesize - 1);
2593 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2594 size = PINNED_CHUNK_MIN_SIZE * 2;
2595 chunk = get_os_memory (size, TRUE);
2596 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2597 total_alloc += size;
2599 /* setup the bookeeping fields */
2600 chunk->num_pages = size / FREELIST_PAGESIZE;
2601 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2602 chunk->page_sizes = (void*)((char*)chunk + offset);
2603 offset += sizeof (int) * chunk->num_pages;
2604 offset += ALLOC_ALIGN - 1;
2605 offset &= ~(ALLOC_ALIGN - 1);
2606 chunk->free_list = (void*)((char*)chunk + offset);
2607 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2608 offset += ALLOC_ALIGN - 1;
2609 offset &= ~(ALLOC_ALIGN - 1);
2610 chunk->start_data = (void*)((char*)chunk + offset);
2612 /* allocate the first page to the freelist */
2613 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2614 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2615 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %d\n", chunk, size));
2616 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2617 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2621 /* assumes freelist for slot is empty, so try to alloc a new page */
2623 get_chunk_freelist (PinnedChunk *chunk, int slot)
2627 p = chunk->free_list [slot];
2629 chunk->free_list [slot] = *p;
2632 for (i = 0; i < chunk->num_pages; ++i) {
2634 if (chunk->page_sizes [i])
2636 size = freelist_sizes [slot];
2637 chunk->page_sizes [i] = size;
2638 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2642 p = chunk->free_list [slot];
2644 chunk->free_list [slot] = *p;
2651 alloc_from_freelist (size_t size)
2655 PinnedChunk *pchunk;
2656 slot = slot_for_size (size);
2657 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2658 g_assert (size <= freelist_sizes [slot]);
2659 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2660 void **p = pchunk->free_list [slot];
2662 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2663 pchunk->free_list [slot] = *p;
2667 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2668 res = get_chunk_freelist (pchunk, slot);
2672 pchunk = alloc_pinned_chunk (size);
2673 /* FIXME: handle OOM */
2674 pchunk->next = pinned_chunk_list;
2675 pinned_chunk_list = pchunk;
2676 res = get_chunk_freelist (pchunk, slot);
2680 /* used for the GC-internal data structures */
2681 /* FIXME: add support for bigger sizes by allocating more than one page
2685 get_internal_mem (size_t size)
2687 return calloc (1, size);
2690 PinnedChunk *pchunk;
2691 slot = slot_for_size (size);
2692 g_assert (size <= freelist_sizes [slot]);
2693 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2694 void **p = pchunk->free_list [slot];
2696 pchunk->free_list [slot] = *p;
2700 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2701 res = get_chunk_freelist (pchunk, slot);
2705 pchunk = alloc_pinned_chunk (size);
2706 /* FIXME: handle OOM */
2707 pchunk->next = internal_chunk_list;
2708 internal_chunk_list = pchunk;
2709 res = get_chunk_freelist (pchunk, slot);
2714 free_internal_mem (void *addr)
2718 PinnedChunk *pchunk;
2719 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2720 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
2721 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
2722 int offset = (char*)addr - (char*)pchunk;
2723 int page = offset / FREELIST_PAGESIZE;
2724 int slot = slot_for_size (pchunk->page_sizes [page]);
2726 *p = pchunk->free_list [slot];
2727 pchunk->free_list [slot] = p;
2731 printf ("free of %p failed\n", addr);
2732 g_assert_not_reached ();
2736 * ######################################################################
2737 * ######## Object allocation
2738 * ######################################################################
2739 * This section of code deals with allocating memory for objects.
2740 * There are several ways:
2741 * *) allocate large objects
2742 * *) allocate normal objects
2743 * *) fast lock-free allocation
2744 * *) allocation of pinned objects
2748 free_large_object (LOSObject *obj)
2750 size_t size = obj->size;
2751 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %d\n", obj->data, obj->size));
2753 los_memory_usage -= size;
2754 size += sizeof (LOSObject);
2755 size += pagesize - 1;
2756 size &= ~(pagesize - 1);
2757 total_alloc -= size;
2759 free_os_memory (obj, size);
2763 * Objects with size >= 64KB are allocated in the large object space.
2764 * They are currently kept track of with a linked list.
2765 * They don't move, so there is no need to pin them during collection
2766 * and we avoid the memcpy overhead.
2768 static void* __attribute__((noinline))
2769 alloc_large_inner (MonoVTable *vtable, size_t size)
2774 int just_did_major_gc = FALSE;
2776 if (los_memory_usage > next_los_collection) {
2777 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %d (los already: %u, limit: %u)\n", size, los_memory_usage, next_los_collection));
2778 just_did_major_gc = TRUE;
2780 major_collection ();
2782 /* later increase based on a percent of the heap size */
2783 next_los_collection = los_memory_usage + 5*1024*1024;
2786 alloc_size += sizeof (LOSObject);
2787 alloc_size += pagesize - 1;
2788 alloc_size &= ~(pagesize - 1);
2789 /* FIXME: handle OOM */
2790 obj = get_os_memory (alloc_size, TRUE);
2792 vtslot = (void**)obj->data;
2794 total_alloc += alloc_size;
2795 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
2796 obj->next = los_object_list;
2797 los_object_list = obj;
2798 los_memory_usage += size;
2800 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %d\n", obj->data, vtable, vtable->klass->name, size));
2804 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
2805 * an object of size @size
2806 * Return FALSE if not found (which means we need a collection)
2809 search_fragment_for_size (size_t size)
2811 Fragment *frag, *prev;
2812 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %d\n", nursery_frag_real_end, size));
2814 for (frag = nursery_fragments; frag; frag = frag->next) {
2815 if (size <= (frag->fragment_end - frag->fragment_start)) {
2816 /* remove from the list */
2818 prev->next = frag->next;
2820 nursery_fragments = frag->next;
2821 nursery_next = frag->fragment_start;
2822 nursery_frag_real_end = frag->fragment_end;
2823 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + size + SCAN_START_SIZE);
2825 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %d (req: %d)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
2826 frag->next = fragment_freelist;
2827 fragment_freelist = frag;
2836 * size is already rounded up.
2839 alloc_degraded (MonoVTable *vtable, size_t size)
2841 GCMemSection *section;
2843 for (section = section_list; section; section = section->next) {
2844 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
2845 p = section->next_data;
2850 section = alloc_section (nursery_section->size * 4);
2851 /* FIXME: handle OOM */
2852 p = section->next_data;
2854 section->next_data += size;
2855 degraded_mode += size;
2856 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %d in section %p\n", p, vtable, vtable->klass->name, size, section));
2862 * Provide a variant that takes just the vtable for small fixed-size objects.
2863 * The aligned size is already computed and stored in vt->gc_descr.
2864 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
2865 * processing. We can keep track of where objects start, for example,
2866 * so when we scan the thread stacks for pinned objects, we can start
2867 * a search for the pinned object in SCAN_START_SIZE chunks.
2870 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
2872 /* FIXME: handle OOM */
2875 size += ALLOC_ALIGN - 1;
2876 size &= ~(ALLOC_ALIGN - 1);
2878 g_assert (vtable->gc_descr);
2881 p = (void**)nursery_next;
2882 /* FIXME: handle overflow */
2883 nursery_next += size;
2884 if (nursery_next >= nursery_temp_end) {
2885 /* there are two cases: the object is too big or we need to collect */
2886 /* there can be another case (from ORP), if we cooperate with the runtime a bit:
2887 * objects that need finalizers can have the high bit set in their size
2888 * so the above check fails and we can readily add the object to the queue.
2889 * This avoids taking again the GC lock when registering, but this is moot when
2890 * doing thread-local allocation, so it may not be a good idea.
2892 if (size > MAX_SMALL_OBJ_SIZE) {
2893 /* get ready for possible collection */
2894 update_current_thread_stack (&dummy);
2895 nursery_next -= size;
2896 p = alloc_large_inner (vtable, size);
2898 if (nursery_next >= nursery_frag_real_end) {
2899 nursery_next -= size;
2900 /* when running in degraded mode, we continue allocing that way
2901 * for a while, to decrease the number of useless nursery collections.
2903 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
2904 p = alloc_degraded (vtable, size);
2908 if (!search_fragment_for_size (size)) {
2909 /* get ready for possible collection */
2910 update_current_thread_stack (&dummy);
2911 minor_collect_or_expand_inner (size);
2912 if (degraded_mode) {
2913 p = alloc_degraded (vtable, size);
2918 /* nursery_next changed by minor_collect_or_expand_inner () */
2919 p = (void*)nursery_next;
2920 nursery_next += size;
2921 if (nursery_next > nursery_temp_end) {
2926 /* record the scan start so we can find pinned objects more easily */
2927 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = p;
2928 /* we just bump nursery_temp_end as well */
2929 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + SCAN_START_SIZE);
2930 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", nursery_next, nursery_temp_end));
2934 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2943 * To be used for interned strings and possibly MonoThread, reflection handles.
2944 * We may want to explicitly free these objects.
2947 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
2949 /* FIXME: handle OOM */
2951 size += ALLOC_ALIGN - 1;
2952 size &= ~(ALLOC_ALIGN - 1);
2954 if (size > MAX_FREELIST_SIZE) {
2955 update_current_thread_stack (&p);
2956 /* large objects are always pinned anyway */
2957 p = alloc_large_inner (vtable, size);
2959 p = alloc_from_freelist (size);
2960 memset (p, 0, size);
2962 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2969 * ######################################################################
2970 * ######## Finalization support
2971 * ######################################################################
2975 * this is valid for the nursery: if the object has been forwarded it means it's
2976 * still refrenced from a root. If it is pinned it's still alive as well.
2977 * Return TRUE if @obj is ready to be finalized.
2979 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
2982 finalize_in_range (void **start, void **end)
2984 FinalizeEntry *entry, *prev;
2988 for (i = 0; i < finalizable_hash_size; ++i) {
2990 for (entry = finalizable_hash [i]; entry;) {
2991 if (entry->object >= start && entry->object < end) {
2992 if (object_is_fin_ready (entry->object)) {
2994 FinalizeEntry *next;
2995 /* remove and put in fin_ready_list */
2997 prev->next = entry->next;
2999 finalizable_hash [i] = entry->next;
3001 num_ready_finalizers++;
3002 num_registered_finalizers--;
3003 entry->next = fin_ready_list;
3004 fin_ready_list = entry;
3005 /* Make it survive */
3006 from = entry->object;
3007 entry->object = copy_object (entry->object, start, end);
3008 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3012 /* update pointer */
3013 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3014 entry->object = copy_object (entry->object, start, end);
3018 entry = entry->next;
3024 null_link_in_range (void **start, void **end)
3026 FinalizeEntry *entry, *prev;
3028 for (i = 0; i < disappearing_link_hash_size; ++i) {
3030 for (entry = disappearing_link_hash [i]; entry;) {
3031 if (entry->object >= start && entry->object < end) {
3032 if (object_is_fin_ready (entry->object)) {
3033 void **p = entry->data;
3036 /* remove from list */
3038 prev->next = entry->next;
3040 disappearing_link_hash [i] = entry->next;
3041 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3043 free_internal_mem (entry);
3048 /* update pointer if it's moved
3049 * FIXME: what if an object is moved earlier?
3051 entry->object = copy_object (entry->object, start, end);
3052 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3054 *link = entry->object;
3058 entry = entry->next;
3064 * mono_gc_finalizers_for_domain:
3065 * @domain: the unloading appdomain
3066 * @out_array: output array
3067 * @out_size: size of output array
3069 * Store inside @out_array up to @out_size objects that belong to the unloading
3070 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3071 * until it returns 0.
3072 * The items are removed from the finalizer data structure, so the caller is supposed
3074 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3077 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3079 FinalizeEntry *entry, *prev;
3081 if (no_finalize || !out_size || !out_array)
3085 for (i = 0; i < finalizable_hash_size; ++i) {
3087 for (entry = finalizable_hash [i]; entry;) {
3088 if (mono_object_domain (entry->object) == domain) {
3089 FinalizeEntry *next;
3090 /* remove and put in out_array */
3092 prev->next = entry->next;
3094 finalizable_hash [i] = entry->next;
3096 num_registered_finalizers--;
3097 out_array [count ++] = entry->object;
3098 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3100 if (count == out_size) {
3107 entry = entry->next;
3115 rehash_fin_table (void)
3119 FinalizeEntry **new_hash;
3120 FinalizeEntry *entry, *next;
3121 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3123 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3124 for (i = 0; i < finalizable_hash_size; ++i) {
3125 for (entry = finalizable_hash [i]; entry; entry = next) {
3126 hash = mono_object_hash (entry->object) % new_size;
3128 entry->next = new_hash [hash];
3129 new_hash [hash] = entry;
3132 free_internal_mem (finalizable_hash);
3133 finalizable_hash = new_hash;
3134 finalizable_hash_size = new_size;
3138 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3140 FinalizeEntry *entry, *prev;
3144 hash = mono_object_hash (obj);
3146 if (num_registered_finalizers >= finalizable_hash_size * 2)
3147 rehash_fin_table ();
3148 hash %= finalizable_hash_size;
3150 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3151 if (entry->object == obj) {
3153 entry->data = user_data;
3155 /* remove from the list */
3157 prev->next = entry->next;
3159 finalizable_hash [hash] = entry->next;
3160 num_registered_finalizers--;
3161 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3162 free_internal_mem (entry);
3170 /* request to deregister, but already out of the list */
3174 entry = get_internal_mem (sizeof (FinalizeEntry));
3175 entry->object = obj;
3176 entry->data = user_data;
3177 entry->next = finalizable_hash [hash];
3178 finalizable_hash [hash] = entry;
3179 num_registered_finalizers++;
3180 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3185 rehash_dislink (void)
3189 FinalizeEntry **new_hash;
3190 FinalizeEntry *entry, *next;
3191 int new_size = g_spaced_primes_closest (num_disappearing_links);
3193 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3194 for (i = 0; i < disappearing_link_hash_size; ++i) {
3195 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3196 hash = mono_aligned_addr_hash (entry->data) % new_size;
3198 entry->next = new_hash [hash];
3199 new_hash [hash] = entry;
3202 free_internal_mem (disappearing_link_hash);
3203 disappearing_link_hash = new_hash;
3204 disappearing_link_hash_size = new_size;
3208 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3210 FinalizeEntry *entry, *prev;
3214 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3216 /* FIXME: add check that link is not in the heap */
3217 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3218 entry = disappearing_link_hash [hash];
3220 for (; entry; entry = entry->next) {
3221 /* link already added */
3222 if (link == entry->data) {
3223 /* NULL obj means remove */
3226 prev->next = entry->next;
3228 disappearing_link_hash [hash] = entry->next;
3229 num_disappearing_links--;
3230 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3231 free_internal_mem (entry);
3233 entry->object = obj; /* we allow the change of object */
3240 entry = get_internal_mem (sizeof (FinalizeEntry));
3241 entry->object = obj;
3243 entry->next = disappearing_link_hash [hash];
3244 disappearing_link_hash [hash] = entry;
3245 num_disappearing_links++;
3246 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3251 mono_gc_invoke_finalizers (void)
3253 FinalizeEntry *entry;
3256 /* FIXME: batch to reduce lock contention */
3257 while (fin_ready_list) {
3259 entry = fin_ready_list;
3261 fin_ready_list = entry->next;
3262 num_ready_finalizers--;
3263 obj = entry->object;
3264 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3268 void (*callback)(void *, void*) = entry->data;
3270 obj = entry->object;
3272 /* the object is on the stack so it is pinned */
3273 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3274 callback (obj, NULL);
3275 free_internal_mem (entry);
3282 mono_gc_pending_finalizers (void)
3284 return fin_ready_list != NULL;
3287 /* Negative value to remove */
3289 mono_gc_add_memory_pressure (gint64 value)
3291 /* FIXME: Use interlocked functions */
3293 memory_pressure += value;
3298 * ######################################################################
3299 * ######## registered roots support
3300 * ######################################################################
3308 RootRecord **new_hash;
3309 RootRecord *entry, *next;
3310 int new_size = g_spaced_primes_closest (num_roots_entries);
3312 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3313 for (i = 0; i < roots_hash_size; ++i) {
3314 for (entry = roots_hash [i]; entry; entry = next) {
3315 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3317 entry->next = new_hash [hash];
3318 new_hash [hash] = entry;
3321 free_internal_mem (roots_hash);
3322 roots_hash = new_hash;
3323 roots_hash_size = new_size;
3327 * We do not coalesce roots.
3330 mono_gc_register_root (char *start, size_t size, void *descr)
3332 RootRecord *new_root;
3333 unsigned int hash = mono_aligned_addr_hash (start);
3335 if (num_roots_entries >= roots_hash_size * 2)
3337 hash %= roots_hash_size;
3338 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3339 /* we allow changing the size and the descriptor (for thread statics etc) */
3340 if (new_root->start_root == start) {
3341 size_t old_size = new_root->end_root - new_root->start_root;
3342 new_root->end_root = new_root->start_root + size;
3343 new_root->root_desc = (mword)descr;
3345 roots_size -= old_size;
3350 new_root = get_internal_mem (sizeof (RootRecord));
3352 new_root->start_root = start;
3353 new_root->end_root = new_root->start_root + size;
3354 new_root->root_desc = (mword)descr;
3356 num_roots_entries++;
3357 new_root->next = roots_hash [hash];
3358 roots_hash [hash] = new_root;
3359 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3369 mono_gc_deregister_root (char* addr)
3371 RootRecord *tmp, *prev = NULL;
3372 unsigned int hash = mono_aligned_addr_hash (addr);
3374 hash %= roots_hash_size;
3375 tmp = roots_hash [hash];
3377 if (tmp->start_root == (char*)addr) {
3379 prev->next = tmp->next;
3381 roots_hash [hash] = tmp->next;
3382 roots_size -= (tmp->end_root - tmp->start_root);
3383 num_roots_entries--;
3384 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3385 free_internal_mem (tmp);
3395 * ######################################################################
3396 * ######## Thread handling (stop/start code)
3397 * ######################################################################
3400 /* eventually share with MonoThread? */
3401 typedef struct _SgenThreadInfo SgenThreadInfo;
3403 struct _SgenThreadInfo {
3404 SgenThreadInfo *next;
3405 ARCH_THREAD_TYPE id;
3406 unsigned int stop_count; /* to catch duplicate signals */
3411 RememberedSet **remset;
3414 /* FIXME: handle large/small config */
3415 #define THREAD_HASH_SIZE 11
3416 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3418 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3420 #if USE_SIGNAL_BASED_START_STOP_WORLD
3422 static sem_t suspend_ack_semaphore;
3423 static unsigned int global_stop_count = 0;
3424 static int suspend_signal_num = SIGPWR;
3425 static int restart_signal_num = SIGXCPU;
3426 static sigset_t suspend_signal_mask;
3427 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3429 /* LOCKING: assumes the GC lock is held */
3430 static SgenThreadInfo*
3431 thread_info_lookup (ARCH_THREAD_TYPE id)
3433 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3434 SgenThreadInfo *info;
3436 info = thread_table [hash];
3437 while (info && !ARCH_THREAD_EQUALS (info->id, id)) {
3444 update_current_thread_stack (void *start)
3446 void *ptr = cur_thread_regs;
3447 SgenThreadInfo *info = thread_info_lookup (ARCH_GET_THREAD ());
3448 info->stack_start = align_pointer (&ptr);
3449 ARCH_STORE_REGS (ptr);
3453 signal_desc (int signum)
3455 if (signum == suspend_signal_num)
3457 if (signum == restart_signal_num)
3462 /* LOCKING: assumes the GC lock is held */
3464 thread_handshake (int signum)
3466 int count, i, result;
3467 SgenThreadInfo *info;
3468 pthread_t me = pthread_self ();
3471 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3472 for (info = thread_table [i]; info; info = info->next) {
3473 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3474 if (ARCH_THREAD_EQUALS (info->id, me)) {
3475 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3478 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3480 result = pthread_kill (info->id, signum);
3482 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3485 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3491 for (i = 0; i < count; ++i) {
3492 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3493 if (errno != EINTR) {
3494 g_error ("sem_wait ()");
3501 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3503 suspend_handler (int sig)
3505 SgenThreadInfo *info;
3508 int old_errno = errno;
3510 id = pthread_self ();
3511 info = thread_info_lookup (id);
3512 stop_count = global_stop_count;
3513 /* duplicate signal */
3514 if (0 && info->stop_count == stop_count) {
3518 /* update the remset info in the thread data structure */
3519 info->remset = remembered_set;
3521 * this includes the register values that the kernel put on the stack.
3522 * Write arch-specific code to only push integer regs and a more accurate
3525 info->stack_start = align_pointer (&id);
3527 /* notify the waiting thread */
3528 sem_post (&suspend_ack_semaphore);
3529 info->stop_count = stop_count;
3531 /* wait until we receive the restart signal */
3534 sigsuspend (&suspend_signal_mask);
3535 } while (info->signal != restart_signal_num);
3537 /* notify the waiting thread */
3538 sem_post (&suspend_ack_semaphore);
3544 restart_handler (int sig)
3546 SgenThreadInfo *info;
3547 int old_errno = errno;
3549 info = thread_info_lookup (pthread_self ());
3550 info->signal = restart_signal_num;
3555 static TV_DECLARE (stop_world_time);
3556 static unsigned long max_pause_usec = 0;
3558 /* LOCKING: assumes the GC lock is held */
3564 global_stop_count++;
3565 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (ARCH_GET_THREAD ()), (gpointer)ARCH_GET_THREAD ()));
3566 TV_GETTIME (stop_world_time);
3567 count = thread_handshake (suspend_signal_num);
3568 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3572 /* LOCKING: assumes the GC lock is held */
3574 restart_world (void)
3577 TV_DECLARE (end_sw);
3580 count = thread_handshake (restart_signal_num);
3581 TV_GETTIME (end_sw);
3582 usec = TV_ELAPSED (stop_world_time, end_sw);
3583 max_pause_usec = MAX (usec, max_pause_usec);
3584 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3588 #endif /* USE_SIGNAL_BASED_START_STOP_WORLD */
3591 * Identify objects pinned in a thread stack and its registers.
3594 pin_thread_data (void *start_nursery, void *end_nursery)
3597 SgenThreadInfo *info;
3599 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3600 for (info = thread_table [i]; info; info = info->next) {
3602 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3605 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3606 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3609 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3610 conservatively_pin_objects_from (cur_thread_regs, cur_thread_regs + ARCH_NUM_REGS, start_nursery, end_nursery);
3614 find_pinning_ref_from_thread (char *obj, size_t size)
3617 SgenThreadInfo *info;
3618 char *endobj = obj + size;
3620 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3621 for (info = thread_table [i]; info; info = info->next) {
3622 char **start = (char**)info->stack_start;
3625 while (start < (char**)info->stack_end) {
3626 if (*start >= obj && *start < endobj) {
3627 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, info->id, start, info->stack_start, info->stack_end));
3633 /* FIXME: check register */
3636 /* return TRUE if ptr points inside the managed heap */
3638 ptr_in_heap (void* ptr)
3640 mword p = (mword)ptr;
3641 if (p < lowest_heap_address || p >= highest_heap_address)
3643 /* FIXME: more checks */
3648 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
3653 /* FIXME: exclude stack locations */
3654 switch ((*p) & REMSET_TYPE_MASK) {
3655 case REMSET_LOCATION:
3657 if ((ptr < start_nursery || ptr >= end_nursery) && ptr_in_heap (ptr)) {
3658 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3659 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
3660 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3661 add_to_global_remset (ptr);
3663 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
3667 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3668 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3671 while (count-- > 0) {
3672 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3673 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
3674 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3675 add_to_global_remset (ptr);
3680 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3681 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3683 scan_object (*ptr, start_nursery, end_nursery);
3686 g_assert_not_reached ();
3692 scan_from_remsets (void *start_nursery, void *end_nursery)
3695 SgenThreadInfo *info;
3696 RememberedSet *remset, *next;
3699 /* the global one */
3700 for (remset = global_remset; remset; remset = remset->next) {
3701 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %d\n", remset->data, remset->store_next, remset->store_next - remset->data));
3702 for (p = remset->data; p < remset->store_next;) {
3703 p = handle_remset (p, start_nursery, end_nursery, TRUE);
3706 /* the per-thread ones */
3707 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3708 for (info = thread_table [i]; info; info = info->next) {
3709 for (remset = info->remset; remset; remset = next) {
3710 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %d\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
3711 for (p = remset->data; p < remset->store_next;) {
3712 p = handle_remset (p, start_nursery, end_nursery, FALSE);
3714 remset->store_next = remset->data;
3715 next = remset->next;
3716 remset->next = NULL;
3717 if (remset != info->remset) {
3718 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3719 free_internal_mem (remset);
3727 * Clear the info in the remembered sets: we're doing a major collection, so
3728 * the per-thread ones are not needed and the global ones will be reconstructed
3732 clear_remsets (void)
3735 SgenThreadInfo *info;
3736 RememberedSet *remset, *next;
3738 /* the global list */
3739 for (remset = global_remset; remset; remset = next) {
3740 remset->store_next = remset->data;
3741 next = remset->next;
3742 remset->next = NULL;
3743 if (remset != global_remset) {
3744 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3745 free_internal_mem (remset);
3748 /* the per-thread ones */
3749 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3750 for (info = thread_table [i]; info; info = info->next) {
3751 for (remset = info->remset; remset; remset = next) {
3752 remset->store_next = remset->data;
3753 next = remset->next;
3754 remset->next = NULL;
3755 if (remset != info->remset) {
3756 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3757 free_internal_mem (remset);
3764 /* LOCKING: assumes the GC lock is held */
3765 static SgenThreadInfo*
3766 gc_register_current_thread (void *addr)
3769 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
3772 info->id = ARCH_GET_THREAD ();
3773 info->stop_count = -1;
3776 info->stack_start = NULL;
3778 /* try to get it with attributes first */
3779 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
3783 pthread_attr_t attr;
3784 pthread_getattr_np (pthread_self (), &attr);
3785 pthread_attr_getstack (&attr, &sstart, &size);
3786 info->stack_end = (char*)sstart + size;
3788 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
3789 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
3792 /* FIXME: we assume the stack grows down */
3793 gsize stack_bottom = (gsize)addr;
3794 stack_bottom += 4095;
3795 stack_bottom &= ~4095;
3796 info->stack_end = (char*)stack_bottom;
3800 /* hash into the table */
3801 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
3802 info->next = thread_table [hash];
3803 thread_table [hash] = info;
3805 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
3806 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
3811 unregister_current_thread (void)
3814 SgenThreadInfo *prev = NULL;
3816 ARCH_THREAD_TYPE id = ARCH_GET_THREAD ();
3818 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3819 p = thread_table [hash];
3821 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
3822 while (!ARCH_THREAD_EQUALS (p->id, id)) {
3827 thread_table [hash] = p->next;
3829 prev->next = p->next;
3831 /* FIXME: transfer remsets if any */
3836 mono_gc_register_thread (void *baseptr)
3838 SgenThreadInfo *info;
3840 info = thread_info_lookup (ARCH_GET_THREAD ());
3842 info = gc_register_current_thread (baseptr);
3844 return info != NULL;
3847 #if USE_PTHREAD_INTERCEPT
3849 #undef pthread_create
3851 #undef pthread_detach
3854 void *(*start_routine) (void *);
3858 } SgenThreadStartInfo;
3861 gc_start_thread (void *arg)
3863 SgenThreadStartInfo *start_info = arg;
3864 SgenThreadInfo* info;
3865 void *t_arg = start_info->arg;
3866 void *(*start_func) (void*) = start_info->start_routine;
3870 info = gc_register_current_thread (&result);
3872 sem_post (&(start_info->registered));
3873 result = start_func (t_arg);
3875 unregister_current_thread ();
3882 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
3884 SgenThreadStartInfo *start_info;
3887 start_info = malloc (sizeof (SgenThreadStartInfo));
3890 sem_init (&(start_info->registered), 0, 0);
3891 start_info->arg = arg;
3892 start_info->start_routine = start_routine;
3894 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
3896 while (sem_wait (&(start_info->registered)) != 0) {
3897 /*if (EINTR != errno) ABORT("sem_wait failed"); */
3900 sem_destroy (&(start_info->registered));
3906 mono_gc_pthread_join (pthread_t thread, void **retval)
3908 return pthread_join (thread, retval);
3912 mono_gc_pthread_detach (pthread_t thread)
3914 return pthread_detach (thread);
3917 #endif /* USE_PTHREAD_INTERCEPT */
3920 * ######################################################################
3921 * ######## Write barriers
3922 * ######################################################################
3925 static RememberedSet*
3926 alloc_remset (int size, gpointer id) {
3927 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
3928 res->store_next = res->data;
3929 res->end_set = res->data + size;
3931 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
3936 * Note: the write barriers first do the needed GC work and then do the actual store:
3937 * this way the value is visible to the conservative GC scan after the write barrier
3938 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
3939 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
3940 * happens when we need to record which pointers contain references to the new generation.
3941 * The write barrier will be executed, but the pointer is still not stored.
3944 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
3947 if (field_ptr >= nursery_start && field_ptr < nursery_real_end) {
3948 *(void**)field_ptr = value;
3951 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
3952 rs = remembered_set;
3953 if (rs->store_next < rs->end_set) {
3954 *(rs->store_next++) = (mword)field_ptr;
3955 *(void**)field_ptr = value;
3958 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3959 rs->next = remembered_set;
3960 remembered_set = rs;
3961 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
3962 *(rs->store_next++) = (mword)field_ptr;
3963 *(void**)field_ptr = value;
3967 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
3969 RememberedSet *rs = remembered_set;
3970 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end) {
3971 *(void**)slot_ptr = value;
3974 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
3975 if (rs->store_next < rs->end_set) {
3976 *(rs->store_next++) = (mword)slot_ptr;
3977 *(void**)slot_ptr = value;
3980 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3981 rs->next = remembered_set;
3982 remembered_set = rs;
3983 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
3984 *(rs->store_next++) = (mword)slot_ptr;
3985 *(void**)slot_ptr = value;
3989 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
3991 RememberedSet *rs = remembered_set;
3992 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end)
3994 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
3995 if (rs->store_next + 1 < rs->end_set) {
3996 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
3997 *(rs->store_next++) = count;
4000 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4001 rs->next = remembered_set;
4002 remembered_set = rs;
4003 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4004 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4005 *(rs->store_next++) = count;
4009 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4011 RememberedSet *rs = remembered_set;
4012 if (ptr >= nursery_start && ptr < nursery_real_end) {
4013 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4014 *(void**)ptr = value;
4017 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", ptr));
4018 /* FIXME: ensure it is on the heap */
4019 if (rs->store_next < rs->end_set) {
4020 *(rs->store_next++) = (mword)ptr;
4021 *(void**)ptr = value;
4024 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4025 rs->next = remembered_set;
4026 remembered_set = rs;
4027 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4028 *(rs->store_next++) = (mword)ptr;
4029 *(void**)ptr = value;
4033 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4035 if (dest >= nursery_start && dest < nursery_real_end) {
4038 DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4042 * mono_gc_wbarrier_object:
4044 * Write barrier to call when obj is the result of a clone or copy of an object.
4047 mono_gc_wbarrier_object (MonoObject* obj)
4049 RememberedSet *rs = remembered_set;
4050 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4051 if (rs->store_next < rs->end_set) {
4052 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4055 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4056 rs->next = remembered_set;
4057 remembered_set = rs;
4058 thread_info_lookup (ARCH_GET_THREAD ())->remset = rs;
4059 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4063 * ######################################################################
4064 * ######## Other mono public interface functions.
4065 * ######################################################################
4069 mono_gc_collect (int generation)
4072 update_current_thread_stack (&generation);
4074 if (generation == 0) {
4075 collect_nursery (0);
4077 major_collection ();
4084 mono_gc_max_generation (void)
4090 mono_gc_collection_count (int generation)
4092 if (generation == 0)
4093 return num_minor_gcs;
4094 return num_major_gcs;
4098 mono_gc_get_used_size (void)
4101 GCMemSection *section;
4103 tot = los_memory_usage;
4104 for (section = section_list; section; section = section->next) {
4105 /* this is approximate... */
4106 tot += section->next_data - section->data;
4108 /* FIXME: account for pinned objects */
4114 mono_gc_get_heap_size (void)
4120 mono_gc_disable (void)
4128 mono_gc_enable (void)
4136 mono_object_is_alive (MonoObject* o)
4142 mono_gc_get_generation (MonoObject *obj)
4144 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
4150 mono_gc_enable_events (void)
4155 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
4157 mono_gc_register_disappearing_link (obj, link_addr);
4162 mono_gc_weak_link_remove (void **link_addr)
4164 mono_gc_register_disappearing_link (NULL, link_addr);
4169 mono_gc_weak_link_get (void **link_addr)
4175 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
4177 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
4178 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
4181 /* conservative scanning */
4182 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
4187 mono_gc_alloc_fixed (size_t size, void *descr)
4189 /* FIXME: do a single allocation */
4190 void *res = calloc (1, size);
4193 if (!mono_gc_register_root (res, size, descr)) {
4201 mono_gc_free_fixed (void* addr)
4203 mono_gc_deregister_root (addr);
4208 mono_gc_is_gc_thread (void)
4212 result = thread_info_lookup (ARCH_GET_THREAD ()) != NULL;
4218 mono_gc_base_init (void)
4221 struct sigaction sinfo;
4223 LOCK_INIT (gc_mutex);
4225 if (gc_initialized) {
4229 gc_initialized = TRUE;
4230 pagesize = mono_pagesize ();
4231 gc_debug_file = stderr;
4232 /* format: MONO_GC_DEBUG=l[,filename] where l is a debug level 0-9 */
4233 if ((env = getenv ("MONO_GC_DEBUG"))) {
4234 if (env [0] >= '0' && env [0] <= '9') {
4235 gc_debug_level = atoi (env);
4241 char *rf = g_strdup_printf ("%s.%d", env, getpid ());
4242 gc_debug_file = fopen (rf, "wb");
4244 gc_debug_file = stderr;
4249 sem_init (&suspend_ack_semaphore, 0, 0);
4251 sigfillset (&sinfo.sa_mask);
4252 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
4253 sinfo.sa_handler = suspend_handler;
4254 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
4255 g_error ("failed sigaction");
4258 sinfo.sa_handler = restart_handler;
4259 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
4260 g_error ("failed sigaction");
4263 sigfillset (&suspend_signal_mask);
4264 sigdelset (&suspend_signal_mask, restart_signal_num);
4266 global_remset = alloc_remset (1024, NULL);
4267 global_remset->next = NULL;
4270 mono_gc_register_thread (&sinfo);
4273 #endif /* HAVE_SGEN_GC */