2 * sgen-gc.c: Simple generational GC.
5 * Paolo Molaro (lupus@ximian.com)
7 * Copyright (C) 2005-2006 Novell, Inc
9 * Thread start/stop adapted from Boehm's GC:
10 * Copyright (c) 1994 by Xerox Corporation. All rights reserved.
11 * Copyright (c) 1996 by Silicon Graphics. All rights reserved.
12 * Copyright (c) 1998 by Fergus Henderson. All rights reserved.
13 * Copyright (c) 2000-2004 by Hewlett-Packard Company. All rights reserved.
15 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
16 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
18 * Permission is hereby granted to use or copy this program
19 * for any purpose, provided the above notices are retained on all copies.
20 * Permission to modify the code and to distribute modified code is granted,
21 * provided the above notices are retained, and a notice that the code was
22 * modified is included with the above copyright notice.
24 * All the rest of the code is LGPL.
26 * Important: allocation provides always zeroed memory, having to do
27 * a memset after allocation is deadly for performance.
28 * Memory usage at startup is currently as follows:
30 * 64 KB internal space
32 * We should provide a small memory config with half the sizes
34 * We currently try to make as few mono assumptions as possible:
35 * 1) 2-word header with no GC pointers in it (firts vtable, second to store the
37 * 2) gc descriptor is the second word in the vtable (first word in the class)
38 * 3) 8 byte alignment is the minimum and enough (not true for special structures, FIXME)
39 * 4) there is a function to get an object's size and the number of
40 * elements in an array.
41 * 5) we know the special way bounds are allocated for complex arrays
43 * Always try to keep stack usage to a minimum: no recursive behaviour
44 * and no large stack allocs.
46 * General description.
47 * Objects are initially allocated in a nursery using a fast bump-pointer technique.
48 * When the nursery is full we start a nursery collection: this is performed with a
50 * When the old generation is full we start a copying GC of the old generation as well:
51 * this will be changed to mark/compact in the future.
52 * The things that complicate this description are:
53 * *) pinned objects: we can't move them so we need to keep track of them
54 * *) no precise info of the thread stacks and registers: we need to be able to
55 * quickly find the objects that may be referenced conservatively and pin them
56 * (this makes the first issues more important)
57 * *) large objects are too expensive to be dealt with using copying GC: we handle them
58 * with mark/sweep during major collections
59 * *) some objects need to not move even if they are small (interned strings, Type handles):
60 * we use mark/sweep for them, too: they are not allocated in the nursery, but inside
61 * PinnedChunks regions
66 *) change the jit to emit write barrier calls when needed (we
67 can have specialized write barriers): done with icalls, still need to
68 use some specialized barriers
69 *) we could have a function pointer in MonoClass to implement
70 customized write barriers for value types
71 *) the write barrier code could be isolated in a couple of functions: when a
72 thread is stopped if it's inside the barrier it is let go again
73 until we stop outside of them (not really needed, see below GC-safe points)
74 *) investigate the stuff needed to advance a thread to a GC-safe
75 point (single-stepping, read from unmapped memory etc) and implement it
76 Not needed yet: since we treat the objects reachable from the stack/regs as
77 roots, we store the ptr and exec the write barrier so there is no race.
78 We may need this to solve the issue with setting the length of arrays and strings.
79 We may need this also for handling precise info on stacks, even simple things
80 as having uninitialized data on the stack and having to wait for the prolog
81 to zero it. Not an issue for the last frame that we scan conservatively.
82 We could always not trust the value in the slots anyway.
83 *) make the jit info table lock free
84 *) modify the jit to save info about references in stack locations:
85 this can be done just for locals as a start, so that at least
86 part of the stack is handled precisely.
87 *) Make the debug printf stuff thread and signal safe.
88 *) test/fix 64 bit issues
89 *) test/fix endianess issues
91 *) add batch moving profile info
92 *) add more timing info
93 *) there is a possible race when an array or string is created: the vtable is set,
94 but the length is set only later so if the GC needs to scan the object in that window,
95 it won't get the correct size for the object. The object can't have references and it will
96 be pinned, but a free memory fragment may be created that overlaps with it.
97 We should change the array max_length field to be at the same offset as the string length:
98 this way we can have a single special alloc function for them that sets the length.
99 Multi-dim arrays have the same issue for rank == 1 for the bounds data.
100 *) implement a card table as the write barrier instead of remembered sets?
101 *) some sort of blacklist support?
102 *) fin_ready_list is part of the root set, too
103 *) consider lowering the large object min size to 16/32KB or so and benchmark
104 *) once mark-compact is implemented we could still keep the
105 copying collector for the old generation and use it if we think
106 it is better (small heaps and no pinning object in the old
108 *) avoid the memory store from copy_object when not needed.
109 *) optimize the write barriers fastpath to happen in managed code
110 *) add an option to mmap the whole heap in one chunk: it makes for many
111 simplifications in the checks (put the nursery at the top and just use a single
112 check for inclusion/exclusion): the issue this has is that on 32 bit systems it's
113 not flexible (too much of the address space may be used by default or we can't
114 increase the heap as needed) and we'd need a race-free mechanism to return memory
115 back to the system (mprotect(PROT_NONE) will still keep the memory allocated if it
116 was written to, munmap is needed, but the following mmap may not find the same segment
118 *) memzero the fragments after restarting the world and optionally a smaller chunk at a time
119 *) an additional strategy to realloc/expand the nursery when fully pinned is to start
120 allocating objects in the old generation. This means that we can't optimize away write
121 barrier calls in ctors (but that is not valid for other reasons, too).
122 *) add write barriers to the Clone methods
131 #include <semaphore.h>
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <sys/mman.h>
138 #include <sys/time.h>
141 #include "metadata/metadata-internals.h"
142 #include "metadata/class-internals.h"
143 #include "metadata/gc-internal.h"
144 #include "metadata/object-internals.h"
145 #include "metadata/threads.h"
146 #include "metadata/sgen-gc.h"
147 #include "metadata/mono-gc.h"
151 * ######################################################################
152 * ######## Types and constants used by the GC.
153 * ######################################################################
155 #if SIZEOF_VOID_P == 4
156 typedef guint32 mword;
158 typedef guint64 mword;
161 static int gc_initialized = 0;
162 static int gc_debug_level = 0;
163 static FILE* gc_debug_file;
166 mono_gc_flush_info (void)
168 fflush (gc_debug_file);
171 #define MAX_DEBUG_LEVEL 9
172 #define DEBUG(level,a) do {if ((level) <= MAX_DEBUG_LEVEL && (level) <= gc_debug_level) a;} while (0)
174 #define TV_DECLARE(name) struct timeval name
175 #define TV_GETTIME(tv) gettimeofday (&(tv), NULL)
176 #define TV_ELAPSED(start,end) (int)((((end).tv_sec - (start).tv_sec) * 1000000) + end.tv_usec - start.tv_usec)
178 #define GC_BITS_PER_WORD (sizeof (mword) * 8)
188 /* each request from the OS ends up in a GCMemSection */
189 typedef struct _GCMemSection GCMemSection;
190 struct _GCMemSection {
194 /* pointer where more data could be allocated if it fits */
198 * scan starts is an array of pointers to objects equally spaced in the allocation area
199 * They let use quickly find pinned objects from pinning pointers.
202 /* in major collections indexes in the pin_queue for objects that pin this section */
205 unsigned short num_scan_start;
209 /* large object space struct: 64+ KB */
210 /* we could make this limit much smaller to avoid memcpy copy
211 * and potentially have more room in the GC descriptor: need to measure
212 * This also means that such small OS objects will need to be
213 * allocated in a different way (using pinned chunks).
214 * We may want to put large but smaller than 64k objects in the fixed space
215 * when we move the object from one generation to another (to limit the
216 * pig in the snake effect).
217 * Note: it may be worth to have an optimized copy function, since we can
218 * assume that objects are aligned and have a multiple of 8 size.
219 * FIXME: This structure needs to be a multiple of 8 bytes in size: this is not
220 * true if MONO_ZERO_LEN_ARRAY is nonzero.
222 typedef struct _LOSObject LOSObject;
225 mword size; /* this is the object size */
226 int dummy; /* to have a sizeof (LOSObject) a multiple of ALLOC_ALIGN and data starting at same alignment */
228 char data [MONO_ZERO_LEN_ARRAY];
231 /* Pinned objects are allocated in the LOS space if bigger than half a page
232 * or from freelists otherwise. We assume that pinned objects are relatively few
233 * and they have a slow dying speed (like interned strings, thread objects).
234 * As such they will be collected only at major collections.
235 * free lists are not global: when we need memory we allocate a PinnedChunk.
236 * Each pinned chunk is made of several pages, the first of wich is used
237 * internally for bookeeping (here think of a page as 4KB). The bookeeping
238 * includes the freelists vectors and info about the object size of each page
239 * in the pinned chunk. So, when needed, a free page is found in a pinned chunk,
240 * a size is assigned to it, the page is divided in the proper chunks and each
241 * chunk is added to the freelist. To not waste space, the remaining space in the
242 * first page is used as objects of size 16 or 32 (need to measure which are more
244 * We use this same structure to allocate memory used internally by the GC, so
245 * we never use malloc/free if we need to alloc during collection: the world is stopped
246 * and malloc/free will deadlock.
247 * When we want to iterate over pinned objects, we just scan a page at a time
248 * linearly according to the size of objects in the page: the next pointer used to link
249 * the items in the freelist uses the same word as the vtable. Since we keep freelists
250 * for each pinned chunk, if the word points outside the pinned chunk it means
252 * We could avoid this expensive scanning in creative ways. We could have a policy
253 * of putting in the pinned space only objects we know about that have no struct fields
254 * with references and we can easily use a even expensive write barrier for them,
255 * since pointer writes on such objects should be rare.
256 * The best compromise is to just alloc interned strings and System.MonoType in them.
257 * It would be nice to allocate MonoThread in it, too: must check that we properly
258 * use write barriers so we don't have to do any expensive scanning of the whole pinned
259 * chunk list during minor collections. We can avoid it now because we alloc in it only
260 * reference-free objects.
262 #define PINNED_FIRST_SLOT_SIZE (sizeof (gpointer) * 4)
263 #define MAX_FREELIST_SIZE 2048
264 #define PINNED_PAGE_SIZE (4096)
265 #define PINNED_CHUNK_MIN_SIZE (4096*8)
266 typedef struct _PinnedChunk PinnedChunk;
267 struct _PinnedChunk {
270 int *page_sizes; /* a 0 means the page is still unused */
273 void *data [1]; /* page sizes and free lists are stored here */
277 * The young generation is divided into fragments. This is because
278 * we can hand one fragments to a thread for lock-less fast alloc and
279 * because the young generation ends up fragmented anyway by pinned objects.
280 * Once a collection is done, a list of fragments is created. When doing
281 * thread local alloc we use smallish nurseries so we allow new threads to
282 * allocate memory from gen0 without triggering a collection. Threads that
283 * are found to allocate lots of memory are given bigger fragments. This
284 * should make the finalizer thread use little nursery memory after a while.
285 * We should start assigning threads very small fragments: if there are many
286 * threads the nursery will be full of reserved space that the threads may not
287 * use at all, slowing down allocation speed.
289 typedef struct _Fragment Fragment;
293 char *fragment_start;
294 char *fragment_limit; /* the current soft limit for allocation */
298 /* the runtime can register areas of memory as roots: we keep two lists of roots,
299 * a pinned root set for conservatively scanned roots and a normal one for
300 * precisely scanned roots (currently implemented as a single list).
302 typedef struct _RootRecord RootRecord;
310 /* for use with write barriers */
311 typedef struct _RememberedSet RememberedSet;
312 struct _RememberedSet {
316 mword data [MONO_ZERO_LEN_ARRAY];
319 /* we have 4 possible values in the low 2 bits */
321 REMSET_LOCATION, /* just a pointer to the exact location */
322 REMSET_RANGE, /* range of pointer fields */
323 REMSET_OBJECT, /* mark all the object for scanning */
324 REMSET_TYPE_MASK = 0x3
327 static __thread RememberedSet *remembered_set MONO_TLS_FAST;
328 static RememberedSet *global_remset;
329 static int store_to_global_remset = 0;
331 /* FIXME: later choose a size that takes into account the RememberedSet struct
332 * and doesn't waste any alloc paddin space.
334 #define DEFAULT_REMSET_SIZE 1024
335 static RememberedSet* alloc_remset (int size, gpointer id);
337 /* Structure that corresponds to a MonoVTable: desc is a mword so requires
338 * no cast from a pointer to an integer
345 /* these bits are set in the object vtable: we could merge them since an object can be
346 * either pinned or forwarded but not both.
347 * We store them in the vtable slot because the bits are used in the sync block for
348 * other purpouses: if we merge them and alloc the sync blocks aligned to 8 bytes, we can change
349 * this and use bit 3 in the syncblock (with the lower two bits both set for forwarded, that
350 * would be an invalid combination for the monitor and hash code).
351 * The values are already shifted.
352 * The forwarding address is stored in the sync block.
354 #define FORWARDED_BIT 1
356 #define VTABLE_BITS_MASK 0x3
358 /* returns NULL if not forwarded, or the forwarded address */
359 #define object_is_forwarded(obj) (((mword*)(obj))[0] & FORWARDED_BIT? (void*)(((mword*)(obj))[1]): NULL)
360 /* set the forwarded address fw_addr for object obj */
361 #define forward_object(obj,fw_addr) do { \
362 ((mword*)(obj))[0] |= FORWARDED_BIT; \
363 ((mword*)(obj))[1] = (mword)(fw_addr); \
366 #define object_is_pinned(obj) (((mword*)(obj))[0] & PINNED_BIT)
367 #define pin_object(obj) do { \
368 ((mword*)(obj))[0] |= PINNED_BIT; \
370 #define unpin_object(obj) do { \
371 ((mword*)(obj))[0] &= ~PINNED_BIT; \
376 * Since we set bits in the vtable, use the macro to load it from the pointer to
377 * an object that is potentially pinned.
379 #define LOAD_VTABLE(addr) ((*(mword*)(addr)) & ~VTABLE_BITS_MASK)
382 safe_name (void* obj)
384 MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
385 return vt->klass->name;
389 safe_object_get_size (MonoObject* o)
391 MonoClass *klass = ((MonoVTable*)LOAD_VTABLE (o))->klass;
392 if (klass == mono_defaults.string_class) {
393 return sizeof (MonoString) + 2 * mono_string_length ((MonoString*) o) + 2;
394 } else if (klass->rank) {
395 MonoArray *array = (MonoArray*)o;
396 size_t size = sizeof (MonoArray) + mono_array_element_size (klass) * mono_array_length (array);
400 size += sizeof (MonoArrayBounds) * klass->rank;
404 /* from a created object: the class must be inited already */
405 return klass->instance_size;
410 * ######################################################################
411 * ######## Global data.
412 * ######################################################################
414 static pthread_mutex_t gc_mutex = PTHREAD_MUTEX_INITIALIZER;
415 static int gc_disabled = 0;
416 static int num_minor_gcs = 0;
417 static int num_major_gcs = 0;
419 /* good sizes are 512KB-1MB: larger ones increase a lot memzeroing time */
420 //#define DEFAULT_NURSERY_SIZE (1024*512*125+4096*118)
421 #define DEFAULT_NURSERY_SIZE (1024*512*2)
422 #define DEFAULT_MAX_SECTION (DEFAULT_NURSERY_SIZE * 16)
423 #define DEFAULT_LOS_COLLECTION_TARGET (DEFAULT_NURSERY_SIZE * 2)
424 /* to quickly find the heard of an object pinned by a conservative address
425 * we keep track of the objects allocated for each SCAN_START_SIZE memory
426 * chunk in the nursery or other memory sections. Larger values have less
427 * memory overhead and bigger runtime cost. 4-8 KB are reasonable values.
429 #define SCAN_START_SIZE (4096*2)
430 /* the minimum size of a fragment that we consider useful for allocation */
431 #define FRAGMENT_MIN_SIZE (512)
432 /* This is a fixed value used for pinned chunks, not the system pagesize */
433 #define FREELIST_PAGESIZE 4096
435 static mword pagesize = 4096; /* FIXME */
436 static mword nursery_size = DEFAULT_NURSERY_SIZE;
437 static mword next_section_size = DEFAULT_NURSERY_SIZE * 4;
438 static mword max_section_size = DEFAULT_MAX_SECTION;
439 static int section_size_used = 0;
440 static int degraded_mode = 0;
442 static LOSObject *los_object_list = NULL;
443 static mword los_memory_usage = 0;
444 static mword los_num_objects = 0;
445 static mword next_los_collection = 2*1024*1024; /* 2 MB, need to tune */
446 static mword total_alloc = 0;
447 /* use this to tune when to do a major/minor collection */
448 static mword memory_pressure = 0;
450 static GCMemSection *section_list = NULL;
451 static GCMemSection *nursery_section = NULL;
452 static mword lowest_heap_address = ~(mword)0;
453 static mword highest_heap_address = 0;
455 typedef struct _FinalizeEntry FinalizeEntry;
456 struct _FinalizeEntry {
459 void *data; /* can be a disappearing link or the data for the finalizer */
460 /* Note we could use just one pointer if we don't support multiple callbacks
461 * for finalizers and per-finalizer data and if we store the obj pointers
462 * in the link like libgc does
467 * The finalizable hash has the object as the key, the
468 * disappearing_link hash, has the link address as key.
470 static FinalizeEntry **finalizable_hash = NULL;
471 /* objects that are ready to be finalized */
472 static FinalizeEntry *fin_ready_list = NULL;
473 /* disappearing links use the same structure but a different list */
474 static FinalizeEntry **disappearing_link_hash = NULL;
475 static mword disappearing_link_hash_size = 0;
476 static mword finalizable_hash_size = 0;
478 static mword num_registered_finalizers = 0;
479 static mword num_ready_finalizers = 0;
480 static mword num_disappearing_links = 0;
481 static int no_finalize = 0;
483 /* keep each size a multiple of ALLOC_ALIGN */
484 /* on 64 bit systems 8 is likely completely unused. */
485 static const int freelist_sizes [] = {
486 8, 16, 24, 32, 40, 48, 64, 80,
487 96, 128, 160, 192, 224, 256, 320, 384,
488 448, 512, 584, 680, 816, 1024, 1360, 2048};
489 #define FREELIST_NUM_SLOTS (sizeof (freelist_sizes) / sizeof (freelist_sizes [0]))
491 static char* max_pinned_chunk_addr = NULL;
492 static char* min_pinned_chunk_addr = (char*)-1;
493 /* pinned_chunk_list is used for allocations of objects that are never moved */
494 static PinnedChunk *pinned_chunk_list = NULL;
495 /* internal_chunk_list is used for allocating structures needed by the GC */
496 static PinnedChunk *internal_chunk_list = NULL;
499 obj_is_from_pinned_alloc (char *p)
501 PinnedChunk *chunk = pinned_chunk_list;
502 for (; chunk; chunk = chunk->next) {
503 if (p >= (char*)chunk->start_data && p < ((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))
509 /* registered roots: the key to the hash is the root start address */
510 static RootRecord **roots_hash = NULL;
511 static int roots_hash_size = 0;
512 static mword roots_size = 0; /* amount of memory in the root set */
513 static mword num_roots_entries = 0;
516 * The current allocation cursors
517 * We allocate objects in the nursery.
518 * The nursery is the area between nursery_start and nursery_real_end.
519 * nursery_next is the pointer to the space where the next object will be allocated.
520 * nursery_temp_end is the pointer to the end of the temporary space reserved for
521 * the allocation: this allows us to allow allocations inside the fragments of the
522 * nursery (the empty holes between pinned objects) and it allows us to set the
523 * scan starts at reasonable intervals.
524 * nursery_next and nursery_temp_end will become per-thread vars to allow lock-free
526 * nursery_first_pinned_start points to the start of the first pinned object in the nursery
527 * nursery_last_pinned_end points to the end of the last pinned object in the nursery
528 * At the next allocation, the area of the nursery where objects can be present is
529 * between MIN(nursery_first_pinned_start, first_fragment_start) and
530 * MAX(nursery_last_pinned_end, nursery_temp_end)
532 static char *nursery_start = NULL;
533 static char *nursery_next = NULL;
534 static char *nursery_temp_end = NULL;
535 static char *nursery_real_end = NULL;
536 static char *nursery_frag_real_end = NULL;
537 static char *nursery_first_pinned_start = NULL;
538 static char *nursery_last_pinned_end = NULL;
540 /* fragments that are free and ready to be used for allocation */
541 static Fragment *nursery_fragments = NULL;
542 /* freeelist of fragment structures */
543 static Fragment *fragment_freelist = NULL;
546 * used when moving the objects
547 * When the nursery is collected, objects are copied to to_space.
548 * The area between to_space and gray_objects is used as a stack
549 * of objects that need their fields checked for more references
551 * We should optimize somehow this mechanism to avoid rescanning
552 * ptr-free objects. The order is also probably not optimal: need to
553 * test cache misses and other graph traversal orders.
555 static char *to_space = NULL;
556 static char *gray_objects = NULL;
557 static char *to_space_end = NULL;
558 static GCMemSection *to_space_section = NULL;
560 /* objects bigger then this go into the large object space */
561 #define MAX_SMALL_OBJ_SIZE 0xffff
564 * ######################################################################
565 * ######## Macros and function declarations.
566 * ######################################################################
570 * Recursion is not allowed for the thread lock.
572 #define LOCK_GC pthread_mutex_lock (&gc_mutex)
573 #define UNLOCK_GC pthread_mutex_unlock (&gc_mutex)
575 #define UPDATE_HEAP_BOUNDARIES(low,high) do { \
576 if ((mword)(low) < lowest_heap_address) \
577 lowest_heap_address = (mword)(low); \
578 if ((mword)(high) > highest_heap_address) \
579 highest_heap_address = (mword)(high); \
583 align_pointer (void *ptr)
585 mword p = (mword)ptr;
586 p += sizeof (gpointer) - 1;
587 p &= ~ (sizeof (gpointer) - 1);
591 /* forward declarations */
592 static void* get_internal_mem (size_t size);
593 static void free_internal_mem (void *addr);
594 static void* get_os_memory (size_t size, int activate);
595 static void free_os_memory (void *addr, size_t size);
596 static void report_internal_mem_usage (void);
598 static int stop_world (void);
599 static int restart_world (void);
600 static void pin_thread_data (void *start_nursery, void *end_nursery);
601 static void scan_from_remsets (void *start_nursery, void *end_nursery);
602 static void find_pinning_ref_from_thread (char *obj, size_t size);
603 static void update_current_thread_stack (void *start);
604 static GCMemSection* alloc_section (size_t size);
605 static void finalize_in_range (void **start, void **end);
606 static void null_link_in_range (void **start, void **end);
607 static gboolean search_fragment_for_size (size_t size);
608 static void mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end);
609 static void clear_remsets (void);
610 static void sweep_pinned_objects (void);
611 static void free_large_object (LOSObject *obj);
612 static void free_mem_section (GCMemSection *section);
615 * ######################################################################
616 * ######## GC descriptors
617 * ######################################################################
618 * Used to quickly get the info the GC needs about an object: size and
619 * where the references are held.
621 /* objects are aligned to 8 bytes boundaries
622 * A descriptor is a pointer in MonoVTable, so 32 or 64 bits of size.
623 * The low 3 bits define the type of the descriptor. The other bits
624 * depend on the type.
625 * As a general rule the 13 remaining low bits define the size, either
626 * of the whole object or of the elements in the arrays. While for objects
627 * the size is already in bytes, for arrays we need to shift, because
628 * array elements might be smaller than 8 bytes. In case of arrays, we
629 * use two bits to describe what the additional high bits represents,
630 * so the default behaviour can handle element sizes less than 2048 bytes.
631 * The high 16 bits, if 0 it means the object is pointer-free.
632 * This design should make it easy and fast to skip over ptr-free data.
633 * The first 4 types should cover >95% of the objects.
634 * Note that since the size of objects is limited to 64K, larger objects
635 * will be allocated in the large object heap.
636 * If we want 4-bytes alignment, we need to put vector and small bitmap
640 DESC_TYPE_RUN_LENGTH, /* 16 bits aligned byte size | 1-3 (offset, numptr) bytes tuples */
641 DESC_TYPE_SMALL_BITMAP, /* 16 bits aligned byte size | 16-48 bit bitmap */
642 DESC_TYPE_STRING, /* nothing */
643 DESC_TYPE_COMPLEX, /* index for bitmap into complex_descriptors */
644 DESC_TYPE_VECTOR, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
645 DESC_TYPE_ARRAY, /* 10 bits element size | 1 bit array | 2 bits desc | element desc */
646 DESC_TYPE_LARGE_BITMAP, /* | 29-61 bitmap bits */
647 DESC_TYPE_COMPLEX_ARR, /* index for bitmap into complex_descriptors */
648 /* subtypes for arrays and vectors */
649 DESC_TYPE_V_PTRFREE = 0,/* there are no refs: keep first so it has a zero value */
650 DESC_TYPE_V_REFS, /* all the array elements are refs */
651 DESC_TYPE_V_RUN_LEN, /* elements are run-length encoded as DESC_TYPE_RUN_LENGTH */
652 DESC_TYPE_V_BITMAP /* elements are as the bitmap in DESC_TYPE_SMALL_BITMAP */
655 #define OBJECT_HEADER_WORDS (sizeof(MonoObject)/sizeof(gpointer))
656 #define LOW_TYPE_BITS 3
657 #define SMALL_BITMAP_SHIFT 16
658 #define SMALL_BITMAP_SIZE (GC_BITS_PER_WORD - SMALL_BITMAP_SHIFT)
659 #define VECTOR_INFO_SHIFT 14
660 #define VECTOR_ELSIZE_SHIFT 3
661 #define LARGE_BITMAP_SIZE (GC_BITS_PER_WORD - LOW_TYPE_BITS)
662 #define MAX_SMALL_SIZE ((1 << SMALL_BITMAP_SHIFT) - 1)
663 #define SMALL_SIZE_MASK 0xfff8
664 #define MAX_ELEMENT_SIZE 0x3ff
665 #define ELEMENT_SIZE_MASK (0x3ff << LOW_TYPE_BITS)
666 #define VECTOR_SUBTYPE_PTRFREE (DESC_TYPE_V_PTRFREE << VECTOR_INFO_SHIFT)
667 #define VECTOR_SUBTYPE_REFS (DESC_TYPE_V_REFS << VECTOR_INFO_SHIFT)
668 #define VECTOR_SUBTYPE_RUN_LEN (DESC_TYPE_V_RUN_LEN << VECTOR_INFO_SHIFT)
669 #define VECTOR_SUBTYPE_BITMAP (DESC_TYPE_V_BITMAP << VECTOR_INFO_SHIFT)
671 #define ALLOC_ALIGN 8
674 /* Root bitmap descriptors are simpler: the lower two bits describe the type
675 * and we either have 30/62 bitmap bits or nibble-based run-length,
676 * or a complex descriptor
679 ROOT_DESC_CONSERVATIVE, /* 0, so matches NULL value */
682 ROOT_DESC_LARGE_BITMAP,
683 ROOT_DESC_TYPE_MASK = 0x3,
684 ROOT_DESC_TYPE_SHIFT = 2,
687 static gsize* complex_descriptors = NULL;
688 static int complex_descriptors_size = 0;
689 static int complex_descriptors_next = 0;
692 alloc_complex_descriptor (gsize *bitmap, int numbits)
694 int nwords = numbits/GC_BITS_PER_WORD + 2;
699 res = complex_descriptors_next;
700 /* linear search, so we don't have duplicates with domain load/unload
701 * this should not be performance critical or we'd have bigger issues
702 * (the number and size of complex descriptors should be small).
704 for (i = 0; i < complex_descriptors_next; ) {
705 if (complex_descriptors [i] == nwords) {
707 for (j = 0; j < nwords - 1; ++j) {
708 if (complex_descriptors [i + 1 + j] != bitmap [j]) {
718 i += complex_descriptors [i];
720 if (complex_descriptors_next + nwords > complex_descriptors_size) {
721 int new_size = complex_descriptors_size * 2 + nwords;
722 complex_descriptors = g_realloc (complex_descriptors, new_size * sizeof (gsize));
723 complex_descriptors_size = new_size;
725 DEBUG (6, fprintf (gc_debug_file, "Complex descriptor %d, size: %d (total desc memory: %d)\n", res, nwords, complex_descriptors_size));
726 complex_descriptors_next += nwords;
727 complex_descriptors [res] = nwords;
728 for (i = 0; i < nwords - 1; ++i) {
729 complex_descriptors [res + 1 + i] = bitmap [i];
730 DEBUG (6, fprintf (gc_debug_file, "\tvalue: %p\n", (void*)complex_descriptors [res + 1 + i]));
737 * Descriptor builders.
740 mono_gc_make_descr_for_string (void)
742 return (void*) DESC_TYPE_STRING;
746 mono_gc_make_descr_for_object (gsize *bitmap, int numbits, size_t obj_size)
748 int first_set = -1, num_set = 0, last_set = -1, i;
750 size_t stored_size = obj_size;
751 stored_size += ALLOC_ALIGN - 1;
752 stored_size &= ~(ALLOC_ALIGN - 1);
753 for (i = 0; i < numbits; ++i) {
754 if (bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
761 if (stored_size <= MAX_SMALL_OBJ_SIZE) {
762 /* check run-length encoding first: one byte offset, one byte number of pointers
763 * on 64 bit archs, we can have 3 runs, just one on 32.
764 * It may be better to use nibbles.
767 desc = DESC_TYPE_RUN_LENGTH | stored_size;
768 DEBUG (6, fprintf (gc_debug_file, "Ptrfree descriptor %p, size: %d\n", (void*)desc, stored_size));
770 } else if (first_set < 256 && num_set < 256 && (first_set + num_set == last_set + 1)) {
771 desc = DESC_TYPE_RUN_LENGTH | stored_size | (first_set << 16) | (num_set << 24);
772 DEBUG (6, fprintf (gc_debug_file, "Runlen descriptor %p, size: %d, first set: %d, num set: %d\n", (void*)desc, stored_size, first_set, num_set));
775 /* we know the 2-word header is ptr-free */
776 if (last_set < SMALL_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
777 desc = DESC_TYPE_SMALL_BITMAP | stored_size | ((*bitmap >> OBJECT_HEADER_WORDS) << SMALL_BITMAP_SHIFT);
778 DEBUG (6, fprintf (gc_debug_file, "Smallbitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
782 /* we know the 2-word header is ptr-free */
783 if (last_set < LARGE_BITMAP_SIZE + OBJECT_HEADER_WORDS) {
784 desc = DESC_TYPE_LARGE_BITMAP | ((*bitmap >> OBJECT_HEADER_WORDS) << LOW_TYPE_BITS);
785 DEBUG (6, fprintf (gc_debug_file, "Largebitmap descriptor %p, size: %d, last set: %d\n", (void*)desc, stored_size, last_set));
788 /* it's a complex object ... */
789 desc = DESC_TYPE_COMPLEX | (alloc_complex_descriptor (bitmap, last_set + 1) << LOW_TYPE_BITS);
793 /* If the array holds references, numbits == 1 and the first bit is set in elem_bitmap */
795 mono_gc_make_descr_for_array (int vector, gsize *elem_bitmap, int numbits, size_t elem_size)
797 int first_set = -1, num_set = 0, last_set = -1, i;
798 mword desc = vector? DESC_TYPE_VECTOR: DESC_TYPE_ARRAY;
799 for (i = 0; i < numbits; ++i) {
800 if (elem_bitmap [i / GC_BITS_PER_WORD] & (1 << (i % GC_BITS_PER_WORD))) {
807 if (elem_size <= MAX_ELEMENT_SIZE) {
808 desc |= elem_size << VECTOR_ELSIZE_SHIFT;
810 return (void*)(desc | VECTOR_SUBTYPE_PTRFREE);
812 /* Note: we also handle structs with just ref fields */
813 if (num_set * sizeof (gpointer) == elem_size) {
814 return (void*)(desc | VECTOR_SUBTYPE_REFS | ((-1LL) << 16));
816 /* FIXME: try run-len first */
817 /* Note: we can't skip the object header here, because it's not present */
818 if (last_set <= SMALL_BITMAP_SIZE) {
819 return (void*)(desc | VECTOR_SUBTYPE_BITMAP | (*elem_bitmap << 16));
822 /* it's am array of complex structs ... */
823 desc = DESC_TYPE_COMPLEX_ARR;
824 desc |= alloc_complex_descriptor (elem_bitmap, last_set + 1) << LOW_TYPE_BITS;
828 /* helper macros to scan and traverse objects, macros because we resue them in many functions */
829 #define STRING_SIZE(size,str) do { \
830 (size) = sizeof (MonoString) + 2 * (mono_string_length ((MonoString*)(str)) + 1); \
831 (size) += (ALLOC_ALIGN - 1); \
832 (size) &= ~(ALLOC_ALIGN - 1); \
835 #define OBJ_RUN_LEN_SIZE(size,vt,obj) do { \
836 (size) = (vt)->desc & 0xfff8; \
839 #define OBJ_BITMAP_SIZE(size,vt,obj) do { \
840 (size) = (vt)->desc & 0xfff8; \
843 //#define PREFETCH(addr) __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(addr)))
844 #define PREFETCH(addr)
846 /* code using these macros must define a HANDLE_PTR(ptr) macro that does the work */
847 #define OBJ_RUN_LEN_FOREACH_PTR(vt,obj) do { \
848 if ((vt)->desc & 0xffff0000) { \
849 /* there are pointers */ \
850 void **_objptr_end; \
851 void **_objptr = (void**)(obj); \
852 _objptr += ((vt)->desc >> 16) & 0xff; \
853 _objptr_end = _objptr + (((vt)->desc >> 24) & 0xff); \
854 while (_objptr < _objptr_end) { \
855 HANDLE_PTR (_objptr, (obj)); \
861 /* a bitmap desc means that there are pointer references or we'd have
862 * choosen run-length, instead: add an assert to check.
864 #define OBJ_BITMAP_FOREACH_PTR(vt,obj) do { \
865 /* there are pointers */ \
866 void **_objptr = (void**)(obj); \
867 gsize _bmap = (vt)->desc >> 16; \
868 _objptr += OBJECT_HEADER_WORDS; \
871 HANDLE_PTR (_objptr, (obj)); \
878 #define OBJ_LARGE_BITMAP_FOREACH_PTR(vt,obj) do { \
879 /* there are pointers */ \
880 void **_objptr = (void**)(obj); \
881 gsize _bmap = (vt)->desc >> LOW_TYPE_BITS; \
882 _objptr += OBJECT_HEADER_WORDS; \
885 HANDLE_PTR (_objptr, (obj)); \
892 #define OBJ_COMPLEX_FOREACH_PTR(vt,obj) do { \
893 /* there are pointers */ \
894 void **_objptr = (void**)(obj); \
895 gsize *bitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
896 int bwords = (*bitmap_data) - 1; \
897 void **start_run = _objptr; \
900 MonoObject *myobj = (MonoObject*)obj; \
901 g_print ("found %d at %p (0x%x): %s.%s\n", bwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
903 while (bwords-- > 0) { \
904 gsize _bmap = *bitmap_data++; \
905 _objptr = start_run; \
906 /*g_print ("bitmap: 0x%x/%d at %p\n", _bmap, bwords, _objptr);*/ \
909 HANDLE_PTR (_objptr, (obj)); \
914 start_run += GC_BITS_PER_WORD; \
918 /* this one is untested */
919 #define OBJ_COMPLEX_ARR_FOREACH_PTR(vt,obj) do { \
920 /* there are pointers */ \
921 gsize *mbitmap_data = complex_descriptors + ((vt)->desc >> LOW_TYPE_BITS); \
922 int mbwords = (*mbitmap_data++) - 1; \
923 int el_size = mono_array_element_size (((MonoObject*)(obj))->vtable->klass); \
924 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
925 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
927 MonoObject *myobj = (MonoObject*)start; \
928 g_print ("found %d at %p (0x%x): %s.%s\n", mbwords, (obj), (vt)->desc, myobj->vtable->klass->name_space, myobj->vtable->klass->name); \
930 while (e_start < e_end) { \
931 void **_objptr = (void**)e_start; \
932 gsize *bitmap_data = mbitmap_data; \
933 unsigned int bwords = mbwords; \
934 while (bwords-- > 0) { \
935 gsize _bmap = *bitmap_data++; \
936 void **start_run = _objptr; \
937 /*g_print ("bitmap: 0x%x\n", _bmap);*/ \
940 HANDLE_PTR (_objptr, (obj)); \
945 _objptr = start_run + GC_BITS_PER_WORD; \
947 e_start += el_size; \
951 #define OBJ_VECTOR_FOREACH_PTR(vt,obj) do { \
952 /* note: 0xffffc000 excludes DESC_TYPE_V_PTRFREE */ \
953 if ((vt)->desc & 0xffffc000) { \
954 int el_size = ((vt)->desc >> 3) & MAX_ELEMENT_SIZE; \
955 /* there are pointers */ \
956 int etype = (vt)->desc & 0xc000; \
957 if (etype == (DESC_TYPE_V_REFS << 14)) { \
958 void **p = (void**)((char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector)); \
959 void **end_refs = (void**)((char*)p + el_size * mono_array_length ((MonoArray*)(obj))); \
960 /* Note: this code can handle also arrays of struct with only references in them */ \
961 while (p < end_refs) { \
962 HANDLE_PTR (p, (obj)); \
965 } else if (etype == DESC_TYPE_V_RUN_LEN << 14) { \
966 int offset = ((vt)->desc >> 16) & 0xff; \
967 int num_refs = ((vt)->desc >> 24) & 0xff; \
968 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
969 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
970 while (e_start < e_end) { \
971 void **p = (void**)e_start; \
974 for (i = 0; i < num_refs; ++i) { \
975 HANDLE_PTR (p + i, (obj)); \
977 e_start += el_size; \
979 } else if (etype == DESC_TYPE_V_BITMAP << 14) { \
980 char *e_start = (char*)(obj) + G_STRUCT_OFFSET (MonoArray, vector); \
981 char *e_end = e_start + el_size * mono_array_length ((MonoArray*)(obj)); \
982 while (e_start < e_end) { \
983 void **p = (void**)e_start; \
984 gsize _bmap = (vt)->desc >> 16; \
985 /* Note: there is no object header here to skip */ \
988 HANDLE_PTR (p, (obj)); \
993 e_start += el_size; \
999 static mword new_obj_references = 0;
1000 static mword obj_references_checked = 0;
1003 #define HANDLE_PTR(ptr,obj) do { \
1004 if (*(ptr) && (char*)*(ptr) >= nursery_start && (char*)*(ptr) < nursery_next) { \
1005 new_obj_references++; \
1006 /*printf ("bogus ptr %p found at %p in object %p (%s.%s)\n", *(ptr), (ptr), o, o->vtable->klass->name_space, o->vtable->klass->name);*/ \
1008 obj_references_checked++; \
1013 * ######################################################################
1014 * ######## Detecting and removing garbage.
1015 * ######################################################################
1016 * This section of code deals with detecting the objects no longer in use
1017 * and reclaiming the memory.
1019 static void __attribute__((noinline))
1020 scan_area (char *start, char *end)
1025 int type_str = 0, type_rlen = 0, type_bitmap = 0, type_vector = 0, type_lbit = 0, type_complex = 0;
1026 new_obj_references = 0;
1027 obj_references_checked = 0;
1028 while (start < end) {
1029 if (!*(void**)start) {
1030 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1033 vt = (GCVTable*)LOAD_VTABLE (start);
1034 DEBUG (8, fprintf (gc_debug_file, "Scanning object %p, vtable: %p (%s)\n", start, vt, vt->klass->name));
1036 MonoObject *obj = (MonoObject*)start;
1037 g_print ("found at %p (0x%x): %s.%s\n", start, vt->desc, obj->vtable->klass->name_space, obj->vtable->klass->name);
1039 type = vt->desc & 0x7;
1040 if (type == DESC_TYPE_STRING) {
1041 STRING_SIZE (skip_size, start);
1045 } else if (type == DESC_TYPE_RUN_LENGTH) {
1046 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1047 g_assert (skip_size);
1048 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1052 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1053 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1054 skip_size *= mono_array_length ((MonoArray*)start);
1055 skip_size += sizeof (MonoArray);
1056 skip_size += (ALLOC_ALIGN - 1);
1057 skip_size &= ~(ALLOC_ALIGN - 1);
1058 OBJ_VECTOR_FOREACH_PTR (vt, start);
1059 if (type == DESC_TYPE_ARRAY) {
1060 /* account for the bounds */
1065 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1066 OBJ_BITMAP_SIZE (skip_size, vt, start);
1067 g_assert (skip_size);
1068 OBJ_BITMAP_FOREACH_PTR (vt,start);
1072 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1073 skip_size = safe_object_get_size ((MonoObject*)start);
1074 skip_size += (ALLOC_ALIGN - 1);
1075 skip_size &= ~(ALLOC_ALIGN - 1);
1076 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1080 } else if (type == DESC_TYPE_COMPLEX) {
1081 /* this is a complex object */
1082 skip_size = safe_object_get_size ((MonoObject*)start);
1083 skip_size += (ALLOC_ALIGN - 1);
1084 skip_size &= ~(ALLOC_ALIGN - 1);
1085 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1089 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1090 /* this is an array of complex structs */
1091 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1092 skip_size *= mono_array_length ((MonoArray*)start);
1093 skip_size += sizeof (MonoArray);
1094 skip_size += (ALLOC_ALIGN - 1);
1095 skip_size &= ~(ALLOC_ALIGN - 1);
1096 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1097 if (type == DESC_TYPE_ARRAY) {
1098 /* account for the bounds */
1107 /*printf ("references to new nursery %p-%p (size: %dk): %d, checked: %d\n", old_start, end, (end-old_start)/1024, new_obj_references, obj_references_checked);
1108 printf ("\tstrings: %d, runl: %d, vector: %d, bitmaps: %d, lbitmaps: %d, complex: %d\n",
1109 type_str, type_rlen, type_vector, type_bitmap, type_lbit, type_complex);*/
1112 static void __attribute__((noinline))
1113 scan_area_for_domain (MonoDomain *domain, char *start, char *end)
1118 while (start < end) {
1119 if (!*(void**)start) {
1120 start += sizeof (void*); /* should be ALLOC_ALIGN, really */
1123 vt = (GCVTable*)LOAD_VTABLE (start);
1124 /* handle threads someway (maybe insert the root domain vtable?) */
1125 if (mono_object_domain (start) == domain && vt->klass != mono_defaults.thread_class) {
1126 DEBUG (1, fprintf (gc_debug_file, "Need to cleanup object %p, (%s)\n", start, safe_name (start)));
1131 type = vt->desc & 0x7;
1132 if (type == DESC_TYPE_STRING) {
1133 STRING_SIZE (skip_size, start);
1134 if (remove) memset (start, 0, skip_size);
1137 } else if (type == DESC_TYPE_RUN_LENGTH) {
1138 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1139 g_assert (skip_size);
1140 if (remove) memset (start, 0, skip_size);
1143 } else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1144 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1145 skip_size *= mono_array_length ((MonoArray*)start);
1146 skip_size += sizeof (MonoArray);
1147 skip_size += (ALLOC_ALIGN - 1);
1148 skip_size &= ~(ALLOC_ALIGN - 1);
1149 if (type == DESC_TYPE_ARRAY) {
1150 /* account for the bounds */
1152 if (remove) memset (start, 0, skip_size);
1155 } else if (type == DESC_TYPE_SMALL_BITMAP) {
1156 OBJ_BITMAP_SIZE (skip_size, vt, start);
1157 g_assert (skip_size);
1158 if (remove) memset (start, 0, skip_size);
1161 } else if (type == DESC_TYPE_LARGE_BITMAP) {
1162 skip_size = safe_object_get_size ((MonoObject*)start);
1163 skip_size += (ALLOC_ALIGN - 1);
1164 skip_size &= ~(ALLOC_ALIGN - 1);
1165 if (remove) memset (start, 0, skip_size);
1168 } else if (type == DESC_TYPE_COMPLEX) {
1169 /* this is a complex object */
1170 skip_size = safe_object_get_size ((MonoObject*)start);
1171 skip_size += (ALLOC_ALIGN - 1);
1172 skip_size &= ~(ALLOC_ALIGN - 1);
1173 if (remove) memset (start, 0, skip_size);
1176 } else if (type == DESC_TYPE_COMPLEX_ARR) {
1177 /* this is an array of complex structs */
1178 skip_size = mono_array_element_size (((MonoVTable*)vt)->klass);
1179 skip_size *= mono_array_length ((MonoArray*)start);
1180 skip_size += sizeof (MonoArray);
1181 skip_size += (ALLOC_ALIGN - 1);
1182 skip_size &= ~(ALLOC_ALIGN - 1);
1183 if (type == DESC_TYPE_ARRAY) {
1184 /* account for the bounds */
1186 if (remove) memset (start, 0, skip_size);
1196 * When appdomains are unloaded we can easily remove objects that have finalizers,
1197 * but all the others could still be present in random places on the heap.
1198 * We need a sweep to get rid of them even though it's going to be costly
1200 * The reason we need to remove them is because we access the vtable and class
1201 * structures to know the object size and the reference bitmap: once the domain is
1202 * unloaded the point to random memory.
1205 mono_gc_clear_domain (MonoDomain * domain)
1207 GCMemSection *section;
1209 for (section = section_list; section; section = section->next) {
1210 scan_area_for_domain (domain, section->data, section->end_data);
1212 /* FIXME: handle big and fixed objects (we remove, don't clear in this case) */
1217 add_to_global_remset (gpointer ptr)
1220 DEBUG (8, fprintf (gc_debug_file, "Adding global remset for %p\n", ptr));
1221 if (global_remset->store_next < global_remset->end_set) {
1222 *(global_remset->store_next++) = (mword)ptr;
1225 rs = alloc_remset (global_remset->end_set - global_remset->data, NULL);
1226 rs->next = global_remset;
1228 *(global_remset->store_next++) = (mword)ptr;
1232 * This is how the copying happens from the nursery to the old generation.
1233 * We assume that at this time all the pinned objects have been identified and
1235 * We run scan_object() for each pinned object so that each referenced
1236 * objects if possible are copied. The new gray objects created can have
1237 * scan_object() run on them right away, too.
1238 * Then we run copy_object() for the precisely tracked roots. At this point
1239 * all the roots are either gray or black. We run scan_object() on the gray
1240 * objects until no more gray objects are created.
1241 * At the end of the process we walk again the pinned list and we unmark
1242 * the pinned flag. As we go we also create the list of free space for use
1243 * in the next allocation runs.
1245 * We need to remember objects from the old generation that point to the new one
1246 * (or just addresses?).
1248 * copy_object could be made into a macro once debugged (use inline for now).
1251 static char* __attribute__((noinline))
1252 copy_object (char *obj, char *from_space_start, char *from_space_end)
1254 if (obj >= from_space_start && obj < from_space_end) {
1258 DEBUG (9, fprintf (gc_debug_file, "Precise copy of %p", obj));
1259 if ((forwarded = object_is_forwarded (obj))) {
1260 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1261 DEBUG (9, fprintf (gc_debug_file, " (already forwarded to %p)\n", forwarded));
1264 if (object_is_pinned (obj)) {
1265 g_assert (((MonoVTable*)LOAD_VTABLE(obj))->gc_descr);
1266 DEBUG (9, fprintf (gc_debug_file, " (pinned, no change)\n"));
1269 objsize = safe_object_get_size ((MonoObject*)obj);
1270 objsize += ALLOC_ALIGN - 1;
1271 objsize &= ~(ALLOC_ALIGN - 1);
1272 DEBUG (9, fprintf (gc_debug_file, " (to %p, %s size: %d)\n", gray_objects, ((MonoObject*)obj)->vtable->klass->name, objsize));
1273 /* FIXME: handle pinned allocs:
1274 * Large objects are simple, at least until we always follow the rule:
1275 * if objsize >= MAX_SMALL_OBJ_SIZE, pin the object and return it.
1276 * At the end of major collections, we walk the los list and if
1277 * the object is pinned, it is marked, otherwise it can be freed.
1279 if (objsize >= MAX_SMALL_OBJ_SIZE || (obj >= min_pinned_chunk_addr && obj < max_pinned_chunk_addr && obj_is_from_pinned_alloc (obj))) {
1280 DEBUG (9, fprintf (gc_debug_file, "Marked LOS/Pinned %p (%s), size: %d\n", obj, safe_name (obj), objsize));
1284 /* ok, the object is not pinned, we can move it */
1285 /* use a optimized memcpy here */
1290 char* edi = gray_objects;
1291 __asm__ __volatile__(
1293 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
1294 : "0" (objsize/4), "1" (edi),"2" (esi)
1299 memcpy (gray_objects, obj, objsize);
1301 /* adjust array->bounds */
1302 vt = ((MonoObject*)obj)->vtable;
1303 g_assert (vt->gc_descr);
1304 if (vt->rank && ((MonoArray*)obj)->bounds) {
1305 MonoArray *array = (MonoArray*)gray_objects;
1306 array->bounds = (MonoArrayBounds*)((char*)gray_objects + ((char*)((MonoArray*)obj)->bounds - (char*)obj));
1307 DEBUG (9, fprintf (gc_debug_file, "Array instance %p: size: %d, rank: %d, length: %d\n", array, objsize, vt->rank, mono_array_length (array)));
1309 /* set the forwarding pointer */
1310 forward_object (obj, gray_objects);
1312 to_space_section->scan_starts [((char*)obj - (char*)to_space_section->data)/SCAN_START_SIZE] = obj;
1313 gray_objects += objsize;
1314 DEBUG (8, g_assert (gray_objects <= to_space_end));
1321 #define HANDLE_PTR(ptr,obj) do { \
1323 void *__old = *(ptr); \
1324 *(ptr) = copy_object (*(ptr), from_start, from_end); \
1325 DEBUG (9, if (__old != *(ptr)) fprintf (gc_debug_file, "Overwrote field at %p with %p (was: %p)\n", (ptr), *(ptr), __old)); \
1326 if (*(ptr) >= (void*)from_start && *(ptr) < (void*)from_end) \
1327 add_to_global_remset ((ptr)); \
1332 * Scan the object pointed to by @start for references to
1333 * other objects between @from_start and @from_end and copy
1334 * them to the gray_objects area.
1335 * Returns a pointer to the end of the object.
1338 scan_object (char *start, char* from_start, char* from_end)
1343 vt = (GCVTable*)LOAD_VTABLE (start);
1344 //type = vt->desc & 0x7;
1346 /* gcc should be smart enough to remove the bounds check, but it isn't:( */
1347 switch (vt->desc & 0x7) {
1348 //if (type == DESC_TYPE_STRING) {
1349 case DESC_TYPE_STRING:
1350 STRING_SIZE (skip_size, start);
1351 return start + skip_size;
1352 //} else if (type == DESC_TYPE_RUN_LENGTH) {
1353 case DESC_TYPE_RUN_LENGTH:
1354 OBJ_RUN_LEN_FOREACH_PTR (vt,start);
1355 OBJ_RUN_LEN_SIZE (skip_size, vt, start);
1356 g_assert (skip_size);
1357 return start + skip_size;
1358 //} else if (type == DESC_TYPE_VECTOR) { // includes ARRAY, too
1359 case DESC_TYPE_ARRAY:
1360 case DESC_TYPE_VECTOR:
1361 OBJ_VECTOR_FOREACH_PTR (vt, start);
1362 skip_size = safe_object_get_size ((MonoObject*)start);
1364 skip_size = (vt->desc >> LOW_TYPE_BITS) & MAX_ELEMENT_SIZE;
1365 skip_size *= mono_array_length ((MonoArray*)start);
1366 skip_size += sizeof (MonoArray);
1368 skip_size += (ALLOC_ALIGN - 1);
1369 skip_size &= ~(ALLOC_ALIGN - 1);
1370 return start + skip_size;
1371 //} else if (type == DESC_TYPE_SMALL_BITMAP) {
1372 case DESC_TYPE_SMALL_BITMAP:
1373 OBJ_BITMAP_FOREACH_PTR (vt,start);
1374 OBJ_BITMAP_SIZE (skip_size, vt, start);
1375 return start + skip_size;
1376 //} else if (type == DESC_TYPE_LARGE_BITMAP) {
1377 case DESC_TYPE_LARGE_BITMAP:
1378 OBJ_LARGE_BITMAP_FOREACH_PTR (vt,start);
1379 skip_size = safe_object_get_size ((MonoObject*)start);
1380 skip_size += (ALLOC_ALIGN - 1);
1381 skip_size &= ~(ALLOC_ALIGN - 1);
1382 return start + skip_size;
1383 //} else if (type == DESC_TYPE_COMPLEX) {
1384 case DESC_TYPE_COMPLEX:
1385 OBJ_COMPLEX_FOREACH_PTR (vt, start);
1386 /* this is a complex object */
1387 skip_size = safe_object_get_size ((MonoObject*)start);
1388 skip_size += (ALLOC_ALIGN - 1);
1389 skip_size &= ~(ALLOC_ALIGN - 1);
1390 return start + skip_size;
1391 //} else if (type == DESC_TYPE_COMPLEX_ARR) {
1392 case DESC_TYPE_COMPLEX_ARR:
1393 OBJ_COMPLEX_ARR_FOREACH_PTR (vt, start);
1394 /* this is an array of complex structs */
1395 skip_size = safe_object_get_size ((MonoObject*)start);
1397 skip_size = mono_array_element_size (((MonoObject*)start)->vtable->klass);
1398 skip_size *= mono_array_length ((MonoArray*)start);
1399 skip_size += sizeof (MonoArray);
1401 skip_size += (ALLOC_ALIGN - 1);
1402 skip_size &= ~(ALLOC_ALIGN - 1);
1403 return start + skip_size;
1405 g_assert_not_reached ();
1410 * Addresses from start to end are already sorted. This function finds the object header
1411 * for each address and pins the object. The addresses must be inside the passed section.
1412 * Return the number of pinned objects.
1415 pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery)
1420 void *last_obj = NULL;
1421 size_t last_obj_size = 0;
1424 void **definitely_pinned = start;
1425 while (start < end) {
1427 /* the range check should be reduntant */
1428 if (addr != last && addr >= start_nursery && addr < end_nursery) {
1429 DEBUG (5, fprintf (gc_debug_file, "Considering pinning addr %p\n", addr));
1430 /* multiple pointers to the same object */
1431 if (addr >= last_obj && (char*)addr < (char*)last_obj + last_obj_size) {
1435 idx = ((char*)addr - (char*)section->data) / SCAN_START_SIZE;
1436 search_start = (void*)section->scan_starts [idx];
1437 if (!search_start || search_start > addr) {
1440 search_start = section->scan_starts [idx];
1441 if (search_start && search_start <= addr)
1444 if (!search_start || search_start > addr)
1445 search_start = start_nursery;
1447 if (search_start < last_obj)
1448 search_start = (char*)last_obj + last_obj_size;
1449 /* now addr should be in an object a short distance from search_start
1450 * Note that search_start must point to zeroed mem or point to an object.
1453 if (!*(void**)search_start) {
1454 mword p = (mword)search_start;
1455 p += sizeof (gpointer);
1456 p += ALLOC_ALIGN - 1;
1457 p &= ~(ALLOC_ALIGN - 1);
1458 search_start = (void*)p;
1461 last_obj = search_start;
1462 last_obj_size = safe_object_get_size ((MonoObject*)search_start);
1463 last_obj_size += ALLOC_ALIGN - 1;
1464 last_obj_size &= ~(ALLOC_ALIGN - 1);
1465 DEBUG (8, fprintf (gc_debug_file, "Pinned try match %p (%s), size %d\n", last_obj, safe_name (last_obj), last_obj_size));
1466 if (addr >= search_start && (char*)addr < (char*)last_obj + last_obj_size) {
1467 DEBUG (4, fprintf (gc_debug_file, "Pinned object %p, vtable %p (%s), count %d\n", search_start, *(void**)search_start, safe_name (search_start), count));
1468 pin_object (search_start);
1469 definitely_pinned [count] = search_start;
1473 /* skip to the next object */
1474 search_start = (void*)((char*)search_start + last_obj_size);
1475 } while (search_start <= addr);
1476 /* we either pinned the correct object or we ignored the addr because
1477 * it points to unused zeroed memory.
1483 //printf ("effective pinned: %d (at the end: %d)\n", count, (char*)end_nursery - (char*)last);
1487 static void** pin_queue;
1488 static int pin_queue_size = 0;
1489 static int next_pin_slot = 0;
1494 gap = (gap * 10) / 13;
1495 if (gap == 9 || gap == 10)
1504 compare_addr (const void *a, const void *b)
1506 return *(const void **)a - *(const void **)b;
1510 /* sort the addresses in array in increasing order */
1512 sort_addresses (void **array, int size)
1515 * qsort is slower as predicted.
1516 * qsort (array, size, sizeof (gpointer), compare_addr);
1523 gap = new_gap (gap);
1526 for (i = 0; i < end; i++) {
1528 if (array [i] > array [j]) {
1529 void* val = array [i];
1530 array [i] = array [j];
1535 if (gap == 1 && !swapped)
1541 print_nursery_gaps (void* start_nursery, void *end_nursery)
1544 gpointer first = start_nursery;
1546 for (i = 0; i < next_pin_slot; ++i) {
1547 next = pin_queue [i];
1548 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1552 fprintf (gc_debug_file, "Nursery range: %p-%p, size: %d\n", first, next, (char*)next-(char*)first);
1555 /* reduce the info in the pin queue, removing duplicate pointers and sorting them */
1557 optimize_pin_queue (int start_slot)
1559 void **start, **cur, **end;
1560 /* sort and uniq pin_queue: we just sort and we let the rest discard multiple values */
1561 /* it may be better to keep ranges of pinned memory instead of individually pinning objects */
1562 DEBUG (5, fprintf (gc_debug_file, "Sorting pin queue, size: %d\n", next_pin_slot));
1563 if ((next_pin_slot - start_slot) > 1)
1564 sort_addresses (pin_queue + start_slot, next_pin_slot - start_slot);
1565 start = cur = pin_queue + start_slot;
1566 end = pin_queue + next_pin_slot;
1569 while (*start == *cur && cur < end)
1573 next_pin_slot = start - pin_queue;
1574 DEBUG (5, fprintf (gc_debug_file, "Pin queue reduced to size: %d\n", next_pin_slot));
1575 //DEBUG (6, print_nursery_gaps (start_nursery, end_nursery));
1580 realloc_pin_queue (void)
1582 int new_size = pin_queue_size? pin_queue_size + pin_queue_size/2: 1024;
1583 void **new_pin = get_internal_mem (sizeof (void*) * new_size);
1584 memcpy (new_pin, pin_queue, sizeof (void*) * next_pin_slot);
1585 free_internal_mem (pin_queue);
1586 pin_queue = new_pin;
1587 pin_queue_size = new_size;
1588 DEBUG (4, fprintf (gc_debug_file, "Reallocated pin queue to size: %d\n", new_size));
1592 * Scan the memory between start and end and queue values which could be pointers
1593 * to the area between start_nursery and end_nursery for later consideration.
1594 * Typically used for thread stacks.
1597 conservatively_pin_objects_from (void **start, void **end, void *start_nursery, void *end_nursery)
1600 while (start < end) {
1601 if (*start >= start_nursery && *start < end_nursery) {
1603 * *start can point to the middle of an object
1604 * note: should we handle pointing at the end of an object?
1605 * pinning in C# code disallows pointing at the end of an object
1606 * but there is some small chance that an optimizing C compiler
1607 * may keep the only reference to an object by pointing
1608 * at the end of it. We ignore this small chance for now.
1609 * Pointers to the end of an object are indistinguishable
1610 * from pointers to the start of the next object in memory
1611 * so if we allow that we'd need to pin two objects...
1612 * We queue the pointer in an array, the
1613 * array will then be sorted and uniqued. This way
1614 * we can coalesce several pinning pointers and it should
1615 * be faster since we'd do a memory scan with increasing
1616 * addresses. Note: we can align the address to the allocation
1617 * alignment, so the unique process is more effective.
1619 mword addr = (mword)*start;
1620 addr &= ~(ALLOC_ALIGN - 1);
1621 if (next_pin_slot >= pin_queue_size)
1622 realloc_pin_queue ();
1623 pin_queue [next_pin_slot++] = (void*)addr;
1624 DEBUG (6, if (count) fprintf (gc_debug_file, "Pinning address %p\n", (void*)addr));
1629 DEBUG (7, if (count) fprintf (gc_debug_file, "found %d potential pinned heap pointers\n", count));
1633 * If generation is 0, just mark objects in the nursery, the others we don't care,
1634 * since they are not going to move anyway.
1635 * There are different areas that are scanned for pinned pointers:
1636 * *) the thread stacks (when jit support is ready only the unmanaged frames)
1637 * *) the pinned handle table
1638 * *) the pinned roots
1640 * Note: when we'll use a write barrier for old to new gen references, we need to
1641 * keep track of old gen objects that point to pinned new gen objects because in that
1642 * case the referenced object will be moved maybe at the next collection, but there
1643 * is no write in the old generation area where the pinned object is referenced
1644 * and we may not consider it as reachable.
1647 mark_pinned_objects (int generation)
1652 * Debugging function: find in the conservative roots where @obj is being pinned.
1655 find_pinning_reference (char *obj, size_t size)
1659 char *endobj = obj + size;
1660 for (i = 0; i < roots_hash_size; ++i) {
1661 for (root = roots_hash [i]; root; root = root->next) {
1662 /* if desc is non-null it has precise info */
1663 if (!root->root_desc) {
1664 char ** start = (char**)root->start_root;
1665 while (start < (char**)root->end_root) {
1666 if (*start >= obj && *start < endobj) {
1667 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in pinned roots %p-%p (at %p in record %p)\n", obj, root->start_root, root->end_root, start, root));
1674 find_pinning_ref_from_thread (obj, size);
1678 * The first thing we do in a collection is to identify pinned objects.
1679 * This function considers all the areas of memory that need to be
1680 * conservatively scanned.
1683 pin_from_roots (void *start_nursery, void *end_nursery)
1687 DEBUG (3, fprintf (gc_debug_file, "Scanning pinned roots (%d bytes, %d entries)\n", (int)roots_size, num_roots_entries));
1688 /* objects pinned from the API are inside these roots */
1689 for (i = 0; i < roots_hash_size; ++i) {
1690 for (root = roots_hash [i]; root; root = root->next) {
1691 /* if desc is non-null it has precise info */
1692 if (root->root_desc)
1694 DEBUG (6, fprintf (gc_debug_file, "Pinned roots %p-%p\n", root->start_root, root->end_root));
1695 conservatively_pin_objects_from ((void**)root->start_root, (void**)root->end_root, start_nursery, end_nursery);
1698 /* now deal with the thread stacks
1699 * in the future we should be able to conservatively scan only:
1700 * *) the cpu registers
1701 * *) the unmanaged stack frames
1702 * *) the _last_ managed stack frame
1703 * *) pointers slots in managed frames
1705 pin_thread_data (start_nursery, end_nursery);
1709 * The memory area from start_root to end_root contains pointers to objects.
1710 * Their position is precisely described by @desc (this means that the pointer
1711 * can be either NULL or the pointer to the start of an object).
1712 * This functions copies them to to_space updates them.
1715 precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc)
1717 switch (desc & ROOT_DESC_TYPE_MASK) {
1718 case ROOT_DESC_BITMAP:
1719 desc >>= ROOT_DESC_TYPE_SHIFT;
1721 if ((desc & 1) && *start_root) {
1722 *start_root = copy_object (*start_root, n_start, n_end);
1723 DEBUG (9, fprintf (gc_debug_file, "Overwrote root at %p with %p\n", start_root, *start_root)); \
1729 case ROOT_DESC_RUN_LEN:
1730 case ROOT_DESC_LARGE_BITMAP:
1731 g_assert_not_reached ();
1736 alloc_fragment (void)
1738 Fragment *frag = fragment_freelist;
1740 fragment_freelist = frag->next;
1744 frag = get_internal_mem (sizeof (Fragment));
1750 * Allocate and setup the data structures needed to be able to allocate objects
1751 * in the nursery. The nursery is stored in nursery_section.
1754 alloc_nursery (void)
1756 GCMemSection *section;
1761 if (nursery_section)
1763 DEBUG (2, fprintf (gc_debug_file, "Allocating nursery size: %d\n", nursery_size));
1764 /* later we will alloc a larger area for the nursery but only activate
1765 * what we need. The rest will be used as expansion if we have too many pinned
1766 * objects in the existing nursery.
1768 /* FIXME: handle OOM */
1769 section = get_internal_mem (sizeof (GCMemSection));
1770 data = get_os_memory (nursery_size, TRUE);
1771 nursery_start = nursery_next = data;
1772 nursery_real_end = data + nursery_size;
1773 nursery_temp_end = data + SCAN_START_SIZE;
1774 UPDATE_HEAP_BOUNDARIES (nursery_start, nursery_real_end);
1775 total_alloc += nursery_size;
1776 DEBUG (4, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", nursery_size, total_alloc));
1777 section->data = section->next_data = data;
1778 section->size = nursery_size;
1779 section->end_data = nursery_real_end;
1780 scan_starts = nursery_size / SCAN_START_SIZE;
1781 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
1782 section->num_scan_start = scan_starts;
1784 /* add to the section list */
1785 section->next = section_list;
1786 section_list = section;
1788 nursery_section = section;
1790 /* Setup the single first large fragment */
1791 frag = alloc_fragment ();
1792 frag->fragment_start = nursery_start;
1793 frag->fragment_limit = nursery_start;
1794 frag->fragment_end = nursery_real_end;
1795 nursery_frag_real_end = nursery_real_end;
1796 /* FIXME: frag here is lost */
1800 * Update roots in the old generation. Since we currently don't have the
1801 * info from the write barriers, we just scan all the objects.
1804 scan_old_generation (char *start, char* end)
1806 GCMemSection *section;
1808 LOSObject *big_object;
1811 for (section = section_list; section; section = section->next) {
1812 if (section == nursery_section)
1814 DEBUG (2, fprintf (gc_debug_file, "Scan of old section: %p-%p, size: %d\n", section->data, section->next_data, (int)(section->next_data - section->data)));
1815 /* we have to deal with zeroed holes in old generation (truncated strings ...) */
1817 while (p < section->next_data) {
1822 DEBUG (8, fprintf (gc_debug_file, "Precise old object scan of %p (%s)\n", p, safe_name (p)));
1823 p = scan_object (p, start, end);
1826 /* scan the old object space, too */
1827 for (big_object = los_object_list; big_object; big_object = big_object->next) {
1828 DEBUG (5, fprintf (gc_debug_file, "Scan of big object: %p (%s), size: %d\n", big_object->data, safe_name (big_object->data), big_object->size));
1829 scan_object (big_object->data, start, end);
1831 /* scan the list of objects ready for finalization */
1832 for (fin = fin_ready_list; fin; fin = fin->next) {
1833 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
1834 fin->object = copy_object (fin->object, start, end);
1838 static mword fragment_total = 0;
1840 * We found a fragment of free memory in the nursery: memzero it and if
1841 * it is big enough, add it to the list of fragments that can be used for
1845 add_nursery_frag (size_t frag_size, char* frag_start, char* frag_end)
1848 DEBUG (4, fprintf (gc_debug_file, "Found empty fragment: %p-%p, size: %d\n", frag_start, frag_end, frag_size));
1849 /* memsetting just the first chunk start is bound to provide better cache locality */
1850 memset (frag_start, 0, frag_size);
1851 /* Not worth dealing with smaller fragments: need to tune */
1852 if (frag_size >= FRAGMENT_MIN_SIZE) {
1853 fragment = alloc_fragment ();
1854 fragment->fragment_start = frag_start;
1855 fragment->fragment_limit = frag_start;
1856 fragment->fragment_end = frag_end;
1857 fragment->next = nursery_fragments;
1858 nursery_fragments = fragment;
1859 fragment_total += frag_size;
1864 drain_gray_stack (char *start_addr, char *end_addr)
1872 * We copied all the reachable objects. Now it's the time to copy
1873 * the objects that were not referenced by the roots, but by the copied objects.
1874 * we built a stack of objects pointed to by gray_start: they are
1875 * additional roots and we may add more items as we go.
1876 * We loop until gray_start == gray_objects which means no more objects have
1877 * been added. Note this is iterative: no recursion is involved.
1878 * We need to walk the LO list as well in search of marked big objects
1879 * (use a flag since this is needed only on major collections). We need to loop
1880 * here as well, so keep a counter of marked LO (increasing it in copy_object).
1883 gray_start = to_space;
1884 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1885 while (gray_start < gray_objects) {
1886 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1887 gray_start = scan_object (gray_start, start_addr, end_addr);
1890 DEBUG (2, fprintf (gc_debug_file, "Gray stack scan: %d usecs\n", TV_ELAPSED (btv, atv)));
1891 //scan_old_generation (start_addr, end_addr);
1892 DEBUG (2, fprintf (gc_debug_file, "Old generation done\n"));
1893 /* walk the finalization queue and move also the objects that need to be
1894 * finalized: use the finalized objects as new roots so the objects they depend
1895 * on are also not reclaimed. As with the roots above, only objects in the nursery
1896 * are marked/copied.
1897 * We need a loop here, since objects ready for finalizers may reference other objects
1898 * that are fin-ready. Speedup with a flag?
1901 fin_ready = num_ready_finalizers;
1902 finalize_in_range ((void**)start_addr, (void**)end_addr);
1904 /* drain the new stack that might have been created */
1905 DEBUG (6, fprintf (gc_debug_file, "Precise scan of gray area post fin: %p-%p, size: %d\n", gray_start, gray_objects, (int)(gray_objects - gray_start)));
1906 while (gray_start < gray_objects) {
1907 DEBUG (9, fprintf (gc_debug_file, "Precise gray object scan %p (%s)\n", gray_start, safe_name (gray_start)));
1908 gray_start = scan_object (gray_start, start_addr, end_addr);
1910 } while (fin_ready != num_ready_finalizers);
1912 DEBUG (2, fprintf (gc_debug_file, "Copied to old space: %d bytes\n", (int)(gray_objects - to_space)));
1913 to_space = gray_start;
1914 to_space_section->next_data = to_space;
1917 * handle disappearing links
1918 * Note we do this after checking the finalization queue because if an object
1919 * survives (at least long enough to be finalized) we don't clear the link.
1920 * This also deals with a possible issue with the monitor reclamation: with the Boehm
1921 * GC a finalized object my lose the monitor because it is cleared before the finalizer is
1924 null_link_in_range ((void**)start_addr, (void**)end_addr);
1926 DEBUG (2, fprintf (gc_debug_file, "Finalize queue handling scan: %d usecs\n", TV_ELAPSED (atv, btv)));
1929 static int last_num_pinned = 0;
1932 build_nursery_fragments (int start_pin, int end_pin)
1934 char *frag_start, *frag_end;
1938 /* FIXME: handle non-NULL fragment_freelist */
1939 fragment_freelist = nursery_fragments;
1940 nursery_fragments = NULL;
1941 frag_start = nursery_start;
1943 /* clear scan starts */
1944 memset (nursery_section->scan_starts, 0, nursery_section->num_scan_start * sizeof (gpointer));
1945 for (i = start_pin; i < end_pin; ++i) {
1946 frag_end = pin_queue [i];
1947 /* remove the pin bit from pinned objects */
1948 unpin_object (frag_end);
1949 nursery_section->scan_starts [((char*)frag_end - (char*)nursery_section->data)/SCAN_START_SIZE] = frag_end;
1950 frag_size = frag_end - frag_start;
1952 add_nursery_frag (frag_size, frag_start, frag_end);
1953 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
1954 frag_size += ALLOC_ALIGN - 1;
1955 frag_size &= ~(ALLOC_ALIGN - 1);
1956 frag_start = (char*)pin_queue [i] + frag_size;
1958 nursery_last_pinned_end = frag_start;
1959 frag_end = nursery_real_end;
1960 frag_size = frag_end - frag_start;
1962 add_nursery_frag (frag_size, frag_start, frag_end);
1963 if (!nursery_fragments) {
1964 DEBUG (1, fprintf (gc_debug_file, "Nursery fully pinned (%d)\n", end_pin - start_pin));
1965 for (i = start_pin; i < end_pin; ++i) {
1966 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
1972 /* FIXME: later reduce code duplication here with the above
1973 * We don't keep track of section fragments for non-nursery sections yet, so
1977 build_section_fragments (GCMemSection *section)
1980 char *frag_start, *frag_end;
1983 /* clear scan starts */
1984 memset (section->scan_starts, 0, section->num_scan_start * sizeof (gpointer));
1985 frag_start = section->data;
1986 section->next_data = section->data;
1987 for (i = section->pin_queue_start; i < section->pin_queue_end; ++i) {
1988 frag_end = pin_queue [i];
1989 /* remove the pin bit from pinned objects */
1990 unpin_object (frag_end);
1991 section->scan_starts [((char*)frag_end - (char*)section->data)/SCAN_START_SIZE] = frag_end;
1992 frag_size = frag_end - frag_start;
1994 memset (frag_start, 0, frag_size);
1995 frag_size = safe_object_get_size ((MonoObject*)pin_queue [i]);
1996 frag_size += ALLOC_ALIGN - 1;
1997 frag_size &= ~(ALLOC_ALIGN - 1);
1998 frag_start = (char*)pin_queue [i] + frag_size;
1999 section->next_data = MAX (section->next_data, frag_start);
2001 frag_end = section->end_data;
2002 frag_size = frag_end - frag_start;
2004 memset (frag_start, 0, frag_size);
2008 * Collect objects in the nursery.
2011 collect_nursery (size_t requested_size)
2013 GCMemSection *section;
2014 size_t max_garbage_amount;
2021 nursery_next = MAX (nursery_next, nursery_last_pinned_end);
2022 /* FIXME: optimize later to use the higher address where an object can be present */
2023 nursery_next = MAX (nursery_next, nursery_real_end);
2025 DEBUG (1, fprintf (gc_debug_file, "Start nursery collection %d %p-%p, size: %d\n", num_minor_gcs, nursery_start, nursery_next, (int)(nursery_next - nursery_start)));
2026 max_garbage_amount = nursery_next - nursery_start;
2028 * not enough room in the old generation to store all the possible data from
2029 * the nursery in a single continuous space.
2030 * We reset to_space if we allocated objects in degraded mode.
2032 if (to_space_section)
2033 to_space = gray_objects = to_space_section->next_data;
2034 if ((to_space_end - to_space) < max_garbage_amount) {
2035 section = alloc_section (nursery_section->size * 4);
2036 g_assert (nursery_section->size >= max_garbage_amount);
2037 to_space = gray_objects = section->next_data;
2038 to_space_end = section->end_data;
2039 to_space_section = section;
2041 DEBUG (2, fprintf (gc_debug_file, "To space setup: %p-%p in section %p\n", to_space, to_space_end, to_space_section));
2042 nursery_section->next_data = nursery_next;
2045 /* world must be stopped already */
2047 /* pin from pinned handles */
2048 pin_from_roots (nursery_start, nursery_next);
2049 /* identify pinned objects */
2050 optimize_pin_queue (0);
2051 next_pin_slot = pin_objects_from_addresses (nursery_section, pin_queue, pin_queue + next_pin_slot, nursery_start, nursery_next);
2053 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2054 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2057 * walk all the roots and copy the young objects to the old generation,
2058 * starting from to_space
2061 scan_from_remsets (nursery_start, nursery_next);
2062 /* we don't have complete write barrier yet, so we scan all the old generation sections */
2064 DEBUG (2, fprintf (gc_debug_file, "Old generation scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2065 /* FIXME: later scan also alloc_pinned objects */
2067 /* the pinned objects are roots */
2068 for (i = 0; i < next_pin_slot; ++i) {
2069 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2070 scan_object (pin_queue [i], nursery_start, nursery_next);
2072 /* registered roots, this includes static fields */
2073 for (i = 0; i < roots_hash_size; ++i) {
2074 for (root = roots_hash [i]; root; root = root->next) {
2075 /* if desc is non-null it has precise info */
2076 if (!root->root_desc)
2078 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2079 precisely_scan_objects_from ((void**)root->start_root, root->end_root, nursery_start, nursery_next, root->root_desc);
2083 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (atv, btv)));
2085 drain_gray_stack (nursery_start, nursery_next);
2087 /* walk the pin_queue, build up the fragment list of free memory, unmark
2088 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2091 build_nursery_fragments (0, next_pin_slot);
2093 DEBUG (2, fprintf (gc_debug_file, "Fragment creation: %d usecs, %d bytes available\n", TV_ELAPSED (btv, atv), fragment_total));
2095 /* prepare the pin queue for the next collection */
2096 last_num_pinned = next_pin_slot;
2098 if (fin_ready_list) {
2099 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2100 mono_gc_finalize_notify ();
2105 major_collection (void)
2107 GCMemSection *section, *prev_section;
2108 LOSObject *bigobj, *prevbo;
2116 /* FIXME: only use these values for the precise scan
2117 * note that to_space pointers should be excluded anyway...
2119 char *heap_start = NULL;
2120 char *heap_end = (char*)-1;
2121 size_t copy_space_required = 0;
2124 DEBUG (1, fprintf (gc_debug_file, "Start major collection %d\n", num_major_gcs));
2127 * FIXME: implement Mark/Compact
2128 * Until that is done, we can just apply mostly the same alg as for the nursery:
2129 * this means we need a big section to potentially copy all the other sections, so
2130 * it is not ideal specially with large heaps.
2132 if (g_getenv ("MONO_GC_NO_MAJOR")) {
2133 collect_nursery (0);
2136 /* FIXME: make sure the nursery next_data ptr is updated */
2137 nursery_section->next_data = nursery_real_end;
2138 /* we should also coalesce scanning from sections close to each other
2139 * and deal with pointers outside of the sections later.
2141 /* The remsets are not useful for a major collection */
2143 /* world must be stopped already */
2145 DEBUG (6, fprintf (gc_debug_file, "Pinning from sections\n"));
2146 for (section = section_list; section; section = section->next) {
2147 section->pin_queue_start = count = next_pin_slot;
2148 pin_from_roots (section->data, section->next_data);
2149 if (count != next_pin_slot) {
2151 optimize_pin_queue (count);
2152 DEBUG (6, fprintf (gc_debug_file, "Found %d pinning addresses in section %p (%d-%d)\n", next_pin_slot - count, section, count, next_pin_slot));
2153 reduced_to = pin_objects_from_addresses (section, pin_queue + count, pin_queue + next_pin_slot, section->data, section->next_data);
2154 section->pin_queue_end = next_pin_slot = count + reduced_to;
2156 copy_space_required += (char*)section->next_data - (char*)section->data;
2158 /* identify possible pointers to the insize of large objects */
2159 DEBUG (6, fprintf (gc_debug_file, "Pinning from large objects\n"));
2160 for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
2161 count = next_pin_slot;
2162 pin_from_roots (bigobj->data, (char*)bigobj->data + bigobj->size);
2163 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2164 if (next_pin_slot != count) {
2165 next_pin_slot = count;
2166 pin_object (bigobj->data);
2167 DEBUG (6, fprintf (gc_debug_file, "Marked large object %p (%s) size: %d from roots\n", bigobj->data, safe_name (bigobj->data), bigobj->size));
2170 /* look for pinned addresses for pinned-alloc objects */
2171 DEBUG (6, fprintf (gc_debug_file, "Pinning from pinned-alloc objects\n"));
2172 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2173 count = next_pin_slot;
2174 pin_from_roots (chunk->start_data, (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE);
2175 /* FIXME: this is only valid until we don't optimize the pin queue midway */
2176 if (next_pin_slot != count) {
2177 mark_pinned_from_addresses (chunk, pin_queue + count, pin_queue + next_pin_slot);
2178 next_pin_slot = count;
2183 DEBUG (2, fprintf (gc_debug_file, "Finding pinned pointers: %d in %d usecs\n", next_pin_slot, TV_ELAPSED (atv, btv)));
2184 DEBUG (4, fprintf (gc_debug_file, "Start scan with %d pinned objects\n", next_pin_slot));
2186 /* allocate the big to space */
2187 DEBUG (4, fprintf (gc_debug_file, "Allocate tospace for size: %d\n", copy_space_required));
2188 section = alloc_section (copy_space_required);
2189 to_space = gray_objects = section->next_data;
2190 to_space_end = section->end_data;
2191 to_space_section = section;
2193 /* the old generation doesn't need to be scanned (no remembered sets or card
2194 * table needed either): the only objects that must survive are those pinned and
2195 * those referenced by the precise roots.
2196 * mark any section without pinned objects, so we can free it since we will be able to
2197 * move all the objects.
2199 /* the pinned objects are roots */
2200 for (i = 0; i < next_pin_slot; ++i) {
2201 DEBUG (6, fprintf (gc_debug_file, "Precise object scan %d of pinned %p (%s)\n", i, pin_queue [i], safe_name (pin_queue [i])));
2202 scan_object (pin_queue [i], heap_start, heap_end);
2204 /* registered roots, this includes static fields */
2205 for (i = 0; i < roots_hash_size; ++i) {
2206 for (root = roots_hash [i]; root; root = root->next) {
2207 /* if desc is non-null it has precise info */
2208 if (!root->root_desc)
2210 DEBUG (6, fprintf (gc_debug_file, "Precise root scan %p-%p (desc: %p)\n", root->start_root, root->end_root, (void*)root->root_desc));
2211 precisely_scan_objects_from ((void**)root->start_root, root->end_root, heap_start, heap_end, root->root_desc);
2214 /* scan the list of objects ready for finalization */
2215 for (fin = fin_ready_list; fin; fin = fin->next) {
2216 DEBUG (5, fprintf (gc_debug_file, "Scan of fin ready object: %p (%s)\n", fin->object, safe_name (fin->object)));
2217 fin->object = copy_object (fin->object, heap_start, heap_end);
2220 DEBUG (2, fprintf (gc_debug_file, "Root scan: %d usecs\n", TV_ELAPSED (btv, atv)));
2222 /* all the objects in the heap */
2223 drain_gray_stack (heap_start, heap_end);
2225 /* sweep the big objects list */
2227 for (bigobj = los_object_list; bigobj;) {
2228 if (object_is_pinned (bigobj->data)) {
2229 unpin_object (bigobj->data);
2232 /* not referenced anywhere, so we can free it */
2234 prevbo->next = bigobj->next;
2236 los_object_list = bigobj->next;
2238 bigobj = bigobj->next;
2239 free_large_object (to_free);
2243 bigobj = bigobj->next;
2245 /* unpin objects from the pinned chunks and free the unmarked ones */
2246 sweep_pinned_objects ();
2248 /* free the unused sections */
2249 prev_section = NULL;
2250 for (section = section_list; section;) {
2251 /* to_space doesn't need handling here and the nursery is special */
2252 if (section == to_space_section || section == nursery_section) {
2253 prev_section = section;
2254 section = section->next;
2257 /* no pinning object, so the section is free */
2258 if (section->pin_queue_start == section->pin_queue_end) {
2259 GCMemSection *to_free;
2261 prev_section->next = section->next;
2263 section_list = section->next;
2265 section = section->next;
2266 free_mem_section (to_free);
2269 DEBUG (6, fprintf (gc_debug_file, "Section %p has still pinned objects (%d)\n", section, section->pin_queue_end - section->pin_queue_start));
2270 build_section_fragments (section);
2272 prev_section = section;
2273 section = section->next;
2276 /* walk the pin_queue, build up the fragment list of free memory, unmark
2277 * pinned objects as we go, memzero() the empty fragments so they are ready for the
2280 build_nursery_fragments (nursery_section->pin_queue_start, nursery_section->pin_queue_end);
2282 /* prepare the pin queue for the next collection */
2284 if (fin_ready_list) {
2285 DEBUG (4, fprintf (gc_debug_file, "Finalizer-thread wakeup: ready %d\n", num_ready_finalizers));
2286 mono_gc_finalize_notify ();
2291 * Allocate a new section of memory to be used as old generation.
2293 static GCMemSection*
2294 alloc_section (size_t size)
2296 GCMemSection *section;
2299 size_t new_size = next_section_size;
2301 if (size > next_section_size) {
2303 new_size += pagesize - 1;
2304 new_size &= ~(pagesize - 1);
2306 section_size_used++;
2307 if (section_size_used > 3) {
2308 section_size_used = 0;
2309 next_section_size *= 2;
2310 if (next_section_size > max_section_size)
2311 next_section_size = max_section_size;
2313 section = get_internal_mem (sizeof (GCMemSection));
2314 data = get_os_memory (new_size, TRUE);
2315 section->data = section->next_data = data;
2316 section->size = new_size;
2317 section->end_data = data + new_size;
2318 UPDATE_HEAP_BOUNDARIES (data, section->end_data);
2319 total_alloc += new_size;
2320 DEBUG (2, fprintf (gc_debug_file, "Expanding heap size: %d, total: %d\n", new_size, total_alloc));
2321 section->data = data;
2322 section->size = new_size;
2323 scan_starts = new_size / SCAN_START_SIZE;
2324 section->scan_starts = get_internal_mem (sizeof (char*) * scan_starts);
2325 section->num_scan_start = scan_starts;
2327 /* add to the section list */
2328 section->next = section_list;
2329 section_list = section;
2335 free_mem_section (GCMemSection *section)
2337 char *data = section->data;
2338 size_t size = section->size;
2339 DEBUG (2, fprintf (gc_debug_file, "Freed section %p, size %d\n", data, size));
2340 free_os_memory (data, size);
2341 free_internal_mem (section);
2342 total_alloc -= size;
2346 * When deciding if it's better to collect or to expand, keep track
2347 * of how much garbage was reclaimed with the last collection: if it's too
2349 * This is called when we could not allocate a small object.
2351 static void __attribute__((noinline))
2352 minor_collect_or_expand_inner (size_t size)
2354 int do_minor_collection = 1;
2356 if (!nursery_section) {
2360 if (do_minor_collection) {
2362 collect_nursery (size);
2363 DEBUG (2, fprintf (gc_debug_file, "Heap size: %d, LOS size: %d\n", total_alloc, los_memory_usage));
2365 /* this also sets the proper pointers for the next allocation */
2366 if (!search_fragment_for_size (size)) {
2368 /* TypeBuilder and MonoMethod are killing mcs with fragmentation */
2369 DEBUG (1, fprintf (gc_debug_file, "nursery collection didn't find enough room for %d alloc (%d pinned)", size, last_num_pinned));
2370 for (i = 0; i < last_num_pinned; ++i) {
2371 DEBUG (3, fprintf (gc_debug_file, "Bastard pinning obj %p (%s), size: %d\n", pin_queue [i], safe_name (pin_queue [i]), safe_object_get_size (pin_queue [i])));
2376 //report_internal_mem_usage ();
2380 * ######################################################################
2381 * ######## Memory allocation from the OS
2382 * ######################################################################
2383 * This section of code deals with getting memory from the OS and
2384 * allocating memory for GC-internal data structures.
2385 * Internal memory can be handled with a freelist for small objects.
2388 #ifndef MAP_ANONYMOUS
2389 #define MAP_ANONYMOUS MAP_ANON
2393 * Allocate a big chunk of memory from the OS (usually 64KB to several megabytes).
2394 * This must not require any lock.
2397 get_os_memory (size_t size, int activate)
2400 unsigned long prot_flags = activate? PROT_READ|PROT_WRITE: PROT_NONE;
2402 size += pagesize - 1;
2403 size &= ~(pagesize - 1);
2404 ptr = mmap (0, size, prot_flags, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
2405 if (ptr == (void*)-1) {
2406 int fd = open ("/dev/zero", O_RDONLY);
2408 ptr = mmap (0, size, prot_flags, MAP_PRIVATE, fd, 0);
2411 if (ptr == (void*)-1) {
2419 * Free the memory returned by get_os_memory (), returning it to the OS.
2422 free_os_memory (void *addr, size_t size)
2424 munmap (addr, size);
2431 report_pinned_chunk (PinnedChunk *chunk, int seq) {
2433 int i, free_pages, num_free, free_mem;
2435 for (i = 0; i < chunk->num_pages; ++i) {
2436 if (!chunk->page_sizes [i])
2439 printf ("Pinned chunk %d at %p, size: %d, pages: %d, free: %d\n", seq, chunk, chunk->num_pages * FREELIST_PAGESIZE, chunk->num_pages, free_pages);
2440 free_mem = FREELIST_PAGESIZE * free_pages;
2441 for (i = 0; i < FREELIST_NUM_SLOTS; ++i) {
2442 if (!chunk->free_list [i])
2445 p = chunk->free_list [i];
2450 printf ("\tfree list of size %d, %d items\n", freelist_sizes [i], num_free);
2451 free_mem += freelist_sizes [i] * num_free;
2453 printf ("\tfree memory in chunk: %d\n", free_mem);
2460 report_internal_mem_usage (void) {
2463 printf ("Internal memory usage:\n");
2465 for (chunk = internal_chunk_list; chunk; chunk = chunk->next) {
2466 report_pinned_chunk (chunk, i++);
2468 printf ("Pinned memory usage:\n");
2470 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2471 report_pinned_chunk (chunk, i++);
2476 * the array of pointers from @start to @end contains conservative
2477 * pointers to objects inside @chunk: mark each referenced object
2481 mark_pinned_from_addresses (PinnedChunk *chunk, void **start, void **end)
2483 for (; start < end; start++) {
2484 char *addr = *start;
2485 int offset = (char*)addr - (char*)chunk;
2486 int page = offset / FREELIST_PAGESIZE;
2487 int obj_offset = page == 0? offset - ((char*)chunk->start_data - (char*)chunk): offset % FREELIST_PAGESIZE;
2488 int slot_size = chunk->page_sizes [page];
2490 /* the page is not allocated */
2493 /* would be faster if we restrict the sizes to power of two,
2494 * but that's a waste of memory: need to measure. it could reduce
2495 * fragmentation since there are less pages needed, if for example
2496 * someone interns strings of each size we end up with one page per
2497 * interned string (still this is just ~40 KB): with more fine-grained sizes
2498 * this increases the number of used pages.
2501 obj_offset /= slot_size;
2502 obj_offset *= slot_size;
2503 addr = (char*)chunk->start_data + obj_offset;
2505 obj_offset /= slot_size;
2506 obj_offset *= slot_size;
2507 addr = (char*)chunk + page * FREELIST_PAGESIZE + obj_offset;
2510 /* if the vtable is inside the chunk it's on the freelist, so skip */
2511 if (*ptr && (*ptr < (void*)chunk->start_data || *ptr > (void*)((char*)chunk + chunk->num_pages * FREELIST_PAGESIZE))) {
2513 DEBUG (6, fprintf (gc_debug_file, "Marked pinned object %p (%s) from roots\n", addr, safe_name (addr)));
2519 sweep_pinned_objects (void)
2526 for (chunk = pinned_chunk_list; chunk; chunk = chunk->next) {
2527 end_chunk = (char*)chunk + chunk->num_pages * FREELIST_PAGESIZE;
2528 DEBUG (6, fprintf (gc_debug_file, "Sweeping pinned chunk %p (ranhe: %p-%p)\n", chunk, chunk->start_data, end_chunk));
2529 for (i = 0; i < chunk->num_pages; ++i) {
2530 obj_size = chunk->page_sizes [i];
2533 p = i? (char*)chunk + i * FREELIST_PAGESIZE: chunk->start_data;
2534 endp = i? p + FREELIST_PAGESIZE: (char*)chunk + FREELIST_PAGESIZE;
2535 DEBUG (6, fprintf (gc_debug_file, "Page %d (size: %d, range: %p-%p)\n", i, obj_size, p, endp));
2536 while (p + obj_size <= endp) {
2538 DEBUG (9, fprintf (gc_debug_file, "Considering %p (vtable: %p)\n", ptr, *ptr));
2539 /* if the first word (the vtable) is outside the chunk we have an object */
2540 if (*ptr && (*ptr < (void*)chunk || *ptr >= end_chunk)) {
2541 if (object_is_pinned (ptr)) {
2543 DEBUG (6, fprintf (gc_debug_file, "Unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2545 /* FIXME: add to freelist */
2546 DEBUG (6, fprintf (gc_debug_file, "Going to free unmarked pinned object %p (%s)\n", ptr, safe_name (ptr)));
2556 * Find the slot number in the freelist for memory chunks that
2557 * can contain @size objects.
2560 slot_for_size (size_t size)
2563 /* do a binary search or lookup table later. */
2564 for (slot = 0; slot < FREELIST_NUM_SLOTS; ++slot) {
2565 if (freelist_sizes [slot] >= size)
2568 g_assert_not_reached ();
2573 * Build a free list for @size memory chunks from the memory area between
2574 * start_page and end_page.
2577 build_freelist (PinnedChunk *chunk, int slot, int size, char *start_page, char *end_page)
2581 /*g_print ("building freelist for slot %d, size %d in %p\n", slot, size, chunk);*/
2583 end = (void**)(end_page - size);
2584 g_assert (!chunk->free_list [slot]);
2585 chunk->free_list [slot] = p;
2586 while ((char*)p + size <= end) {
2588 *p = (void*)((char*)p + size);
2592 /*g_print ("%d items created, max: %d\n", count, (end_page - start_page) / size);*/
2596 alloc_pinned_chunk (size_t size)
2601 size += pagesize; /* at least one page */
2602 size += pagesize - 1;
2603 size &= ~(pagesize - 1);
2604 if (size < PINNED_CHUNK_MIN_SIZE * 2)
2605 size = PINNED_CHUNK_MIN_SIZE * 2;
2606 chunk = get_os_memory (size, TRUE);
2607 UPDATE_HEAP_BOUNDARIES (chunk, ((char*)chunk + size));
2608 total_alloc += size;
2610 /* setup the bookeeping fields */
2611 chunk->num_pages = size / FREELIST_PAGESIZE;
2612 offset = G_STRUCT_OFFSET (PinnedChunk, data);
2613 chunk->page_sizes = (void*)((char*)chunk + offset);
2614 offset += sizeof (int) * chunk->num_pages;
2615 offset += ALLOC_ALIGN - 1;
2616 offset &= ~(ALLOC_ALIGN - 1);
2617 chunk->free_list = (void*)((char*)chunk + offset);
2618 offset += sizeof (void*) * FREELIST_NUM_SLOTS;
2619 offset += ALLOC_ALIGN - 1;
2620 offset &= ~(ALLOC_ALIGN - 1);
2621 chunk->start_data = (void*)((char*)chunk + offset);
2623 /* allocate the first page to the freelist */
2624 chunk->page_sizes [0] = PINNED_FIRST_SLOT_SIZE;
2625 build_freelist (chunk, slot_for_size (PINNED_FIRST_SLOT_SIZE), PINNED_FIRST_SLOT_SIZE, chunk->start_data, ((char*)chunk + FREELIST_PAGESIZE));
2626 DEBUG (4, fprintf (gc_debug_file, "Allocated pinned chunk %p, size: %d\n", chunk, size));
2627 min_pinned_chunk_addr = MIN (min_pinned_chunk_addr, (char*)chunk->start_data);
2628 max_pinned_chunk_addr = MAX (max_pinned_chunk_addr, ((char*)chunk + size));
2632 /* assumes freelist for slot is empty, so try to alloc a new page */
2634 get_chunk_freelist (PinnedChunk *chunk, int slot)
2638 p = chunk->free_list [slot];
2640 chunk->free_list [slot] = *p;
2643 for (i = 0; i < chunk->num_pages; ++i) {
2645 if (chunk->page_sizes [i])
2647 size = freelist_sizes [slot];
2648 chunk->page_sizes [i] = size;
2649 build_freelist (chunk, slot, size, (char*)chunk + FREELIST_PAGESIZE * i, (char*)chunk + FREELIST_PAGESIZE * (i + 1));
2653 p = chunk->free_list [slot];
2655 chunk->free_list [slot] = *p;
2662 alloc_from_freelist (size_t size)
2666 PinnedChunk *pchunk;
2667 slot = slot_for_size (size);
2668 /*g_print ("using slot %d for size %d (slot size: %d)\n", slot, size, freelist_sizes [slot]);*/
2669 g_assert (size <= freelist_sizes [slot]);
2670 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2671 void **p = pchunk->free_list [slot];
2673 /*g_print ("found freelist for slot %d in chunk %p, returning %p, next %p\n", slot, pchunk, p, *p);*/
2674 pchunk->free_list [slot] = *p;
2678 for (pchunk = pinned_chunk_list; pchunk; pchunk = pchunk->next) {
2679 res = get_chunk_freelist (pchunk, slot);
2683 pchunk = alloc_pinned_chunk (size);
2684 /* FIXME: handle OOM */
2685 pchunk->next = pinned_chunk_list;
2686 pinned_chunk_list = pchunk;
2687 res = get_chunk_freelist (pchunk, slot);
2691 /* used for the GC-internal data structures */
2692 /* FIXME: add support for bigger sizes by allocating more than one page
2696 get_internal_mem (size_t size)
2698 return calloc (1, size);
2701 PinnedChunk *pchunk;
2702 slot = slot_for_size (size);
2703 g_assert (size <= freelist_sizes [slot]);
2704 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2705 void **p = pchunk->free_list [slot];
2707 pchunk->free_list [slot] = *p;
2711 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2712 res = get_chunk_freelist (pchunk, slot);
2716 pchunk = alloc_pinned_chunk (size);
2717 /* FIXME: handle OOM */
2718 pchunk->next = internal_chunk_list;
2719 internal_chunk_list = pchunk;
2720 res = get_chunk_freelist (pchunk, slot);
2725 free_internal_mem (void *addr)
2729 PinnedChunk *pchunk;
2730 for (pchunk = internal_chunk_list; pchunk; pchunk = pchunk->next) {
2731 /*printf ("trying to free %p in %p (pages: %d)\n", addr, pchunk, pchunk->num_pages);*/
2732 if (addr >= (void*)pchunk && (char*)addr < (char*)pchunk + pchunk->num_pages * FREELIST_PAGESIZE) {
2733 int offset = (char*)addr - (char*)pchunk;
2734 int page = offset / FREELIST_PAGESIZE;
2735 int slot = slot_for_size (pchunk->page_sizes [page]);
2737 *p = pchunk->free_list [slot];
2738 pchunk->free_list [slot] = p;
2742 printf ("free of %p failed\n", addr);
2743 g_assert_not_reached ();
2747 * ######################################################################
2748 * ######## Object allocation
2749 * ######################################################################
2750 * This section of code deals with allocating memory for objects.
2751 * There are several ways:
2752 * *) allocate large objects
2753 * *) allocate normal objects
2754 * *) fast lock-free allocation
2755 * *) allocation of pinned objects
2759 free_large_object (LOSObject *obj)
2761 size_t size = obj->size;
2762 DEBUG (4, fprintf (gc_debug_file, "Freed large object %p, size %d\n", obj->data, obj->size));
2764 los_memory_usage -= size;
2765 size += sizeof (LOSObject);
2766 size += pagesize - 1;
2767 size &= ~(pagesize - 1);
2768 total_alloc -= size;
2770 free_os_memory (obj, size);
2774 * Objects with size >= 64KB are allocated in the large object space.
2775 * They are currently kept track of with a linked list.
2776 * They don't move, so there is no need to pin them during collection
2777 * and we avoid the memcpy overhead.
2779 static void* __attribute__((noinline))
2780 alloc_large_inner (MonoVTable *vtable, size_t size)
2785 int just_did_major_gc = FALSE;
2787 if (los_memory_usage > next_los_collection) {
2788 DEBUG (4, fprintf (gc_debug_file, "Should trigger major collection: req size %d (los already: %u, limit: %u)\n", size, los_memory_usage, next_los_collection));
2789 just_did_major_gc = TRUE;
2791 major_collection ();
2793 /* later increase based on a percent of the heap size */
2794 next_los_collection = los_memory_usage + 5*1024*1024;
2797 alloc_size += sizeof (LOSObject);
2798 alloc_size += pagesize - 1;
2799 alloc_size &= ~(pagesize - 1);
2800 /* FIXME: handle OOM */
2801 obj = get_os_memory (alloc_size, TRUE);
2803 vtslot = (void**)obj->data;
2805 total_alloc += alloc_size;
2806 UPDATE_HEAP_BOUNDARIES (obj->data, (char*)obj->data + size);
2807 obj->next = los_object_list;
2808 los_object_list = obj;
2809 los_memory_usage += size;
2811 DEBUG (4, fprintf (gc_debug_file, "Allocated large object %p, vtable: %p (%s), size: %d\n", obj->data, vtable, vtable->klass->name, size));
2815 /* check if we have a suitable fragment in nursery_fragments to be able to allocate
2816 * an object of size @size
2817 * Return FALSE if not found (which means we need a collection)
2820 search_fragment_for_size (size_t size)
2822 Fragment *frag, *prev;
2823 DEBUG (4, fprintf (gc_debug_file, "Searching nursery fragment %p, size: %d\n", nursery_frag_real_end, size));
2825 for (frag = nursery_fragments; frag; frag = frag->next) {
2826 if (size <= (frag->fragment_end - frag->fragment_start)) {
2827 /* remove from the list */
2829 prev->next = frag->next;
2831 nursery_fragments = frag->next;
2832 nursery_next = frag->fragment_start;
2833 nursery_frag_real_end = frag->fragment_end;
2834 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + size + SCAN_START_SIZE);
2836 DEBUG (4, fprintf (gc_debug_file, "Using nursery fragment %p-%p, size: %d (req: %d)\n", nursery_next, nursery_frag_real_end, nursery_frag_real_end - nursery_next, size));
2837 frag->next = fragment_freelist;
2838 fragment_freelist = frag;
2847 * size is already rounded up.
2850 alloc_degraded (MonoVTable *vtable, size_t size)
2852 GCMemSection *section;
2854 for (section = section_list; section; section = section->next) {
2855 if (section != nursery_section && (section->end_data - section->next_data) >= size) {
2856 p = section->next_data;
2861 section = alloc_section (nursery_section->size * 4);
2862 /* FIXME: handle OOM */
2863 p = section->next_data;
2865 section->next_data += size;
2866 degraded_mode += size;
2867 DEBUG (3, fprintf (gc_debug_file, "Allocated (degraded) object %p, vtable: %p (%s), size: %d in section %p\n", p, vtable, vtable->klass->name, size, section));
2873 * Provide a variant that takes just the vtable for small fixed-size objects.
2874 * The aligned size is already computed and stored in vt->gc_descr.
2875 * Note: every SCAN_START_SIZE or so we are given the chance to do some special
2876 * processing. We can keep track of where objects start, for example,
2877 * so when we scan the thread stacks for pinned objects, we can start
2878 * a search for the pinned object in SCAN_START_SIZE chunks.
2881 mono_gc_alloc_obj (MonoVTable *vtable, size_t size)
2883 /* FIXME: handle OOM */
2886 size += ALLOC_ALIGN - 1;
2887 size &= ~(ALLOC_ALIGN - 1);
2889 g_assert (vtable->gc_descr);
2892 p = (void**)nursery_next;
2893 /* FIXME: handle overflow */
2894 nursery_next += size;
2895 if (nursery_next >= nursery_temp_end) {
2896 /* there are two cases: the object is too big or we need to collect */
2897 /* there can be another case (from ORP), if we cooperate with the runtime a bit:
2898 * objects that need finalizers can have the high bit set in their size
2899 * so the above check fails and we can readily add the object to the queue.
2900 * This avoids taking again the GC lock when registering, but this is moot when
2901 * doing thread-local allocation, so it may not be a good idea.
2903 if (size > MAX_SMALL_OBJ_SIZE) {
2904 /* get ready for possible collection */
2905 update_current_thread_stack (&dummy);
2906 nursery_next -= size;
2907 p = alloc_large_inner (vtable, size);
2909 if (nursery_next >= nursery_frag_real_end) {
2910 nursery_next -= size;
2911 /* when running in degraded mode, we continue allocing that way
2912 * for a while, to decrease the number of useless nursery collections.
2914 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE) {
2915 p = alloc_degraded (vtable, size);
2919 if (!search_fragment_for_size (size)) {
2920 /* get ready for possible collection */
2921 update_current_thread_stack (&dummy);
2922 minor_collect_or_expand_inner (size);
2923 if (degraded_mode) {
2924 p = alloc_degraded (vtable, size);
2929 /* nursery_next changed by minor_collect_or_expand_inner () */
2930 p = (void*)nursery_next;
2931 nursery_next += size;
2932 if (nursery_next > nursery_temp_end) {
2937 /* record the scan start so we can find pinned objects more easily */
2938 nursery_section->scan_starts [((char*)p - (char*)nursery_section->data)/SCAN_START_SIZE] = p;
2939 /* we just bump nursery_temp_end as well */
2940 nursery_temp_end = MIN (nursery_frag_real_end, nursery_next + SCAN_START_SIZE);
2941 DEBUG (5, fprintf (gc_debug_file, "Expanding local alloc: %p-%p\n", nursery_next, nursery_temp_end));
2945 DEBUG (6, fprintf (gc_debug_file, "Allocated object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2954 * To be used for interned strings and possibly MonoThread, reflection handles.
2955 * We may want to explicitly free these objects.
2958 mono_gc_alloc_pinned_obj (MonoVTable *vtable, size_t size)
2960 /* FIXME: handle OOM */
2962 size += ALLOC_ALIGN - 1;
2963 size &= ~(ALLOC_ALIGN - 1);
2965 if (size > MAX_FREELIST_SIZE) {
2966 update_current_thread_stack (&p);
2967 /* large objects are always pinned anyway */
2968 p = alloc_large_inner (vtable, size);
2970 p = alloc_from_freelist (size);
2971 memset (p, 0, size);
2973 DEBUG (6, fprintf (gc_debug_file, "Allocated pinned object %p, vtable: %p (%s), size: %d\n", p, vtable, vtable->klass->name, size));
2980 * ######################################################################
2981 * ######## Finalization support
2982 * ######################################################################
2986 * this is valid for the nursery: if the object has been forwarded it means it's
2987 * still refrenced from a root. If it is pinned it's still alive as well.
2988 * Return TRUE if @obj is ready to be finalized.
2990 #define object_is_fin_ready(obj) (!object_is_pinned (obj) && !object_is_forwarded (obj))
2993 finalize_in_range (void **start, void **end)
2995 FinalizeEntry *entry, *prev;
2999 for (i = 0; i < finalizable_hash_size; ++i) {
3001 for (entry = finalizable_hash [i]; entry;) {
3002 if (entry->object >= start && entry->object < end) {
3003 if (object_is_fin_ready (entry->object)) {
3005 FinalizeEntry *next;
3006 /* remove and put in fin_ready_list */
3008 prev->next = entry->next;
3010 finalizable_hash [i] = entry->next;
3012 num_ready_finalizers++;
3013 num_registered_finalizers--;
3014 entry->next = fin_ready_list;
3015 fin_ready_list = entry;
3016 /* Make it survive */
3017 from = entry->object;
3018 entry->object = copy_object (entry->object, start, end);
3019 DEBUG (5, fprintf (gc_debug_file, "Queueing object for finalization: %p (%s) (was at %p) (%d/%d)\n", entry->object, safe_name (entry->object), from, num_ready_finalizers, num_registered_finalizers));
3023 /* update pointer */
3024 DEBUG (5, fprintf (gc_debug_file, "Updating object for finalization: %p (%s)\n", entry->object, safe_name (entry->object)));
3025 entry->object = copy_object (entry->object, start, end);
3029 entry = entry->next;
3035 null_link_in_range (void **start, void **end)
3037 FinalizeEntry *entry, *prev;
3039 for (i = 0; i < disappearing_link_hash_size; ++i) {
3041 for (entry = disappearing_link_hash [i]; entry;) {
3042 if (entry->object >= start && entry->object < end) {
3043 if (object_is_fin_ready (entry->object)) {
3044 void **p = entry->data;
3047 /* remove from list */
3049 prev->next = entry->next;
3051 disappearing_link_hash [i] = entry->next;
3052 DEBUG (5, fprintf (gc_debug_file, "Dislink nullified at %p to GCed object %p\n", p, entry->object));
3054 free_internal_mem (entry);
3059 /* update pointer if it's moved
3060 * FIXME: what if an object is moved earlier?
3062 entry->object = copy_object (entry->object, start, end);
3063 DEBUG (5, fprintf (gc_debug_file, "Updated dislink at %p to %p\n", entry->data, entry->object));
3065 *link = entry->object;
3069 entry = entry->next;
3075 * mono_gc_finalizers_for_domain:
3076 * @domain: the unloading appdomain
3077 * @out_array: output array
3078 * @out_size: size of output array
3080 * Store inside @out_array up to @out_size objects that belong to the unloading
3081 * appdomain @domain. Returns the number of stored items. Can be called repeteadly
3082 * until it returns 0.
3083 * The items are removed from the finalizer data structure, so the caller is supposed
3085 * @out_array should be on the stack to allow the GC to know the objects are still alive.
3088 mono_gc_finalizers_for_domain (MonoDomain *domain, MonoObject **out_array, int out_size)
3090 FinalizeEntry *entry, *prev;
3092 if (no_finalize || !out_size || !out_array)
3096 for (i = 0; i < finalizable_hash_size; ++i) {
3098 for (entry = finalizable_hash [i]; entry;) {
3099 if (mono_object_domain (entry->object) == domain) {
3100 FinalizeEntry *next;
3101 /* remove and put in out_array */
3103 prev->next = entry->next;
3105 finalizable_hash [i] = entry->next;
3107 num_registered_finalizers--;
3108 out_array [count ++] = entry->object;
3109 DEBUG (5, fprintf (gc_debug_file, "Collecting object for finalization: %p (%s) (%d/%d)\n", entry->object, safe_name (entry->object), num_ready_finalizers, num_registered_finalizers));
3111 if (count == out_size) {
3118 entry = entry->next;
3126 rehash_fin_table (void)
3130 FinalizeEntry **new_hash;
3131 FinalizeEntry *entry, *next;
3132 int new_size = g_spaced_primes_closest (num_registered_finalizers);
3134 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3135 for (i = 0; i < finalizable_hash_size; ++i) {
3136 for (entry = finalizable_hash [i]; entry; entry = next) {
3137 hash = mono_object_hash (entry->object) % new_size;
3139 entry->next = new_hash [hash];
3140 new_hash [hash] = entry;
3143 free_internal_mem (finalizable_hash);
3144 finalizable_hash = new_hash;
3145 finalizable_hash_size = new_size;
3149 mono_gc_register_for_finalization (MonoObject *obj, void *user_data)
3151 FinalizeEntry *entry, *prev;
3155 hash = mono_object_hash (obj);
3157 if (num_registered_finalizers >= finalizable_hash_size * 2)
3158 rehash_fin_table ();
3159 hash %= finalizable_hash_size;
3161 for (entry = finalizable_hash [hash]; entry; entry = entry->next) {
3162 if (entry->object == obj) {
3164 entry->data = user_data;
3166 /* remove from the list */
3168 prev->next = entry->next;
3170 finalizable_hash [hash] = entry->next;
3171 num_registered_finalizers--;
3172 DEBUG (5, fprintf (gc_debug_file, "Removed finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3173 free_internal_mem (entry);
3181 /* request to deregister, but already out of the list */
3185 entry = get_internal_mem (sizeof (FinalizeEntry));
3186 entry->object = obj;
3187 entry->data = user_data;
3188 entry->next = finalizable_hash [hash];
3189 finalizable_hash [hash] = entry;
3190 num_registered_finalizers++;
3191 DEBUG (5, fprintf (gc_debug_file, "Added finalizer %p for object: %p (%s) (%d)\n", entry, obj, obj->vtable->klass->name, num_registered_finalizers));
3196 rehash_dislink (void)
3200 FinalizeEntry **new_hash;
3201 FinalizeEntry *entry, *next;
3202 int new_size = g_spaced_primes_closest (num_disappearing_links);
3204 new_hash = get_internal_mem (new_size * sizeof (FinalizeEntry*));
3205 for (i = 0; i < disappearing_link_hash_size; ++i) {
3206 for (entry = disappearing_link_hash [i]; entry; entry = next) {
3207 hash = mono_aligned_addr_hash (entry->data) % new_size;
3209 entry->next = new_hash [hash];
3210 new_hash [hash] = entry;
3213 free_internal_mem (disappearing_link_hash);
3214 disappearing_link_hash = new_hash;
3215 disappearing_link_hash_size = new_size;
3219 mono_gc_register_disappearing_link (MonoObject *obj, void *link)
3221 FinalizeEntry *entry, *prev;
3225 if (num_disappearing_links >= disappearing_link_hash_size * 2)
3227 /* FIXME: add check that link is not in the heap */
3228 hash = mono_aligned_addr_hash (link) % disappearing_link_hash_size;
3229 entry = disappearing_link_hash [hash];
3231 for (; entry; entry = entry->next) {
3232 /* link already added */
3233 if (link == entry->data) {
3234 /* NULL obj means remove */
3237 prev->next = entry->next;
3239 disappearing_link_hash [hash] = entry->next;
3240 num_disappearing_links--;
3241 DEBUG (5, fprintf (gc_debug_file, "Removed dislink %p (%d)\n", entry, num_disappearing_links));
3242 free_internal_mem (entry);
3244 entry->object = obj; /* we allow the change of object */
3251 entry = get_internal_mem (sizeof (FinalizeEntry));
3252 entry->object = obj;
3254 entry->next = disappearing_link_hash [hash];
3255 disappearing_link_hash [hash] = entry;
3256 num_disappearing_links++;
3257 DEBUG (5, fprintf (gc_debug_file, "Added dislink %p for object: %p (%s) at %p\n", entry, obj, obj->vtable->klass->name, link));
3262 mono_gc_invoke_finalizers (void)
3264 FinalizeEntry *entry;
3267 /* FIXME: batch to reduce lock contention */
3268 while (fin_ready_list) {
3270 entry = fin_ready_list;
3272 fin_ready_list = entry->next;
3273 num_ready_finalizers--;
3274 obj = entry->object;
3275 DEBUG (7, fprintf (gc_debug_file, "Finalizing object %p (%s)\n", obj, safe_name (obj)));
3279 void (*callback)(void *, void*) = entry->data;
3281 obj = entry->object;
3283 /* the object is on the stack so it is pinned */
3284 /*g_print ("Calling finalizer for object: %p (%s)\n", entry->object, safe_name (entry->object));*/
3285 callback (obj, NULL);
3286 free_internal_mem (entry);
3293 mono_gc_pending_finalizers (void)
3295 return fin_ready_list != NULL;
3298 /* Negative value to remove */
3300 mono_gc_add_memory_pressure (gint64 value)
3302 /* FIXME: Use interlocked functions */
3304 memory_pressure += value;
3309 * ######################################################################
3310 * ######## registered roots support
3311 * ######################################################################
3319 RootRecord **new_hash;
3320 RootRecord *entry, *next;
3321 int new_size = g_spaced_primes_closest (num_roots_entries);
3323 new_hash = get_internal_mem (new_size * sizeof (RootRecord*));
3324 for (i = 0; i < roots_hash_size; ++i) {
3325 for (entry = roots_hash [i]; entry; entry = next) {
3326 hash = mono_aligned_addr_hash (entry->start_root) % new_size;
3328 entry->next = new_hash [hash];
3329 new_hash [hash] = entry;
3332 free_internal_mem (roots_hash);
3333 roots_hash = new_hash;
3334 roots_hash_size = new_size;
3338 * We do not coalesce roots.
3341 mono_gc_register_root (char *start, size_t size, void *descr)
3343 RootRecord *new_root;
3344 unsigned int hash = mono_aligned_addr_hash (start);
3346 if (num_roots_entries >= roots_hash_size * 2)
3348 hash %= roots_hash_size;
3349 for (new_root = roots_hash [hash]; new_root; new_root = new_root->next) {
3350 /* we allow changing the size and the descriptor (for thread statics etc) */
3351 if (new_root->start_root == start) {
3352 size_t old_size = new_root->end_root - new_root->start_root;
3353 new_root->end_root = new_root->start_root + size;
3354 new_root->root_desc = (mword)descr;
3356 roots_size -= old_size;
3361 new_root = get_internal_mem (sizeof (RootRecord));
3363 new_root->start_root = start;
3364 new_root->end_root = new_root->start_root + size;
3365 new_root->root_desc = (mword)descr;
3367 num_roots_entries++;
3368 new_root->next = roots_hash [hash];
3369 roots_hash [hash] = new_root;
3370 DEBUG (3, fprintf (gc_debug_file, "Added root %p for range: %p-%p, descr: %p (%d/%d bytes)\n", new_root, new_root->start_root, new_root->end_root, descr, (int)size, (int)roots_size));
3380 mono_gc_deregister_root (char* addr)
3382 RootRecord *tmp, *prev = NULL;
3383 unsigned int hash = mono_aligned_addr_hash (addr);
3385 hash %= roots_hash_size;
3386 tmp = roots_hash [hash];
3388 if (tmp->start_root == (char*)addr) {
3390 prev->next = tmp->next;
3392 roots_hash [hash] = tmp->next;
3393 roots_size -= (tmp->end_root - tmp->start_root);
3394 num_roots_entries--;
3395 DEBUG (3, fprintf (gc_debug_file, "Removed root %p for range: %p-%p\n", tmp, tmp->start_root, tmp->end_root));
3396 free_internal_mem (tmp);
3406 * ######################################################################
3407 * ######## Thread handling (stop/start code)
3408 * ######################################################################
3411 #undef pthread_create
3413 #undef pthread_detach
3416 void *(*start_routine) (void *);
3420 } SgenThreadStartInfo;
3422 /* eventually share with MonoThread? */
3423 typedef struct _SgenThreadInfo SgenThreadInfo;
3425 struct _SgenThreadInfo {
3426 SgenThreadInfo *next;
3428 unsigned int stop_count; /* to catch duplicate signals */
3433 RememberedSet **remset;
3436 /* FIXME: handle large/small config */
3437 #define THREAD_HASH_SIZE 11
3438 #define HASH_PTHREAD_T(id) (((unsigned int)(id) >> 4) * 2654435761u)
3440 static SgenThreadInfo* thread_table [THREAD_HASH_SIZE];
3441 static sem_t suspend_ack_semaphore;
3442 static unsigned int global_stop_count = 0;
3443 static int suspend_signal_num = SIGPWR;
3444 static int restart_signal_num = SIGXCPU;
3445 static sigset_t suspend_signal_mask;
3446 static mword cur_thread_regs [ARCH_NUM_REGS] = {0};
3448 /* LOCKING: assumes the GC lock is held */
3449 static SgenThreadInfo*
3450 thread_info_lookup (pthread_t id)
3452 unsigned int hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3453 SgenThreadInfo *info;
3455 info = thread_table [hash];
3456 while (info && !pthread_equal (info->id, id)) {
3463 update_current_thread_stack (void *start)
3465 void *ptr = cur_thread_regs;
3466 SgenThreadInfo *info = thread_info_lookup (pthread_self ());
3467 info->stack_start = align_pointer (&ptr);
3468 ARCH_STORE_REGS (ptr);
3472 signal_desc (int signum)
3474 if (signum == suspend_signal_num)
3476 if (signum == restart_signal_num)
3481 /* LOCKING: assumes the GC lock is held */
3483 thread_handshake (int signum)
3485 int count, i, result;
3486 SgenThreadInfo *info;
3487 pthread_t me = pthread_self ();
3490 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3491 for (info = thread_table [i]; info; info = info->next) {
3492 DEBUG (4, fprintf (gc_debug_file, "considering thread %p for signal %d (%s)\n", info, signum, signal_desc (signum)));
3493 if (pthread_equal (info->id, me)) {
3494 DEBUG (4, fprintf (gc_debug_file, "Skip (equal): %p, %p\n", (void*)me, (void*)info->id));
3497 /*if (signum == suspend_signal_num && info->stop_count == global_stop_count)
3499 result = pthread_kill (info->id, signum);
3501 DEBUG (4, fprintf (gc_debug_file, "thread %p signal sent\n", info));
3504 DEBUG (4, fprintf (gc_debug_file, "thread %p signal failed: %d (%s)\n", (void*)info->id, result, strerror (result)));
3510 for (i = 0; i < count; ++i) {
3511 while ((result = sem_wait (&suspend_ack_semaphore)) != 0) {
3512 if (errno != EINTR) {
3513 g_error ("sem_wait ()");
3520 /* LOCKING: assumes the GC lock is held (by the stopping thread) */
3522 suspend_handler (int sig)
3524 SgenThreadInfo *info;
3527 int old_errno = errno;
3529 id = pthread_self ();
3530 info = thread_info_lookup (id);
3531 stop_count = global_stop_count;
3532 /* duplicate signal */
3533 if (0 && info->stop_count == stop_count) {
3537 /* update the remset info in the thread data structure */
3538 info->remset = remembered_set;
3540 * this includes the register values that the kernel put on the stack.
3541 * Write arch-specific code to only push integer regs and a more accurate
3544 info->stack_start = align_pointer (&id);
3546 /* notify the waiting thread */
3547 sem_post (&suspend_ack_semaphore);
3548 info->stop_count = stop_count;
3550 /* wait until we receive the restart signal */
3553 sigsuspend (&suspend_signal_mask);
3554 } while (info->signal != restart_signal_num);
3556 /* notify the waiting thread */
3557 sem_post (&suspend_ack_semaphore);
3563 restart_handler (int sig)
3565 SgenThreadInfo *info;
3566 int old_errno = errno;
3568 info = thread_info_lookup (pthread_self ());
3569 info->signal = restart_signal_num;
3574 static TV_DECLARE (stop_world_time);
3575 static unsigned long max_pause_usec = 0;
3577 /* LOCKING: assumes the GC lock is held */
3583 global_stop_count++;
3584 DEBUG (3, fprintf (gc_debug_file, "stopping world n %d from %p %p\n", global_stop_count, thread_info_lookup (pthread_self ()), (gpointer)pthread_self ()));
3585 TV_GETTIME (stop_world_time);
3586 count = thread_handshake (suspend_signal_num);
3587 DEBUG (3, fprintf (gc_debug_file, "world stopped %d thread(s)\n", count));
3591 /* LOCKING: assumes the GC lock is held */
3593 restart_world (void)
3596 TV_DECLARE (end_sw);
3599 count = thread_handshake (restart_signal_num);
3600 TV_GETTIME (end_sw);
3601 usec = TV_ELAPSED (stop_world_time, end_sw);
3602 max_pause_usec = MAX (usec, max_pause_usec);
3603 DEBUG (2, fprintf (gc_debug_file, "restarted %d thread(s) (pause time: %d usec, max: %d)\n", count, (int)usec, (int)max_pause_usec));
3608 * Identify objects pinned in a thread stack and its registers.
3611 pin_thread_data (void *start_nursery, void *end_nursery)
3614 SgenThreadInfo *info;
3616 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3617 for (info = thread_table [i]; info; info = info->next) {
3619 DEBUG (2, fprintf (gc_debug_file, "Skipping dead thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3622 DEBUG (2, fprintf (gc_debug_file, "Scanning thread %p, range: %p-%p, size: %d\n", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start));
3623 conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery);
3626 DEBUG (2, fprintf (gc_debug_file, "Scanning current thread registers\n"));
3627 conservatively_pin_objects_from (cur_thread_regs, cur_thread_regs + ARCH_NUM_REGS, start_nursery, end_nursery);
3631 find_pinning_ref_from_thread (char *obj, size_t size)
3634 SgenThreadInfo *info;
3635 char *endobj = obj + size;
3637 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3638 for (info = thread_table [i]; info; info = info->next) {
3639 char **start = (char**)info->stack_start;
3642 while (start < (char**)info->stack_end) {
3643 if (*start >= obj && *start < endobj) {
3644 DEBUG (0, fprintf (gc_debug_file, "Object %p referenced in thread %p (id %p) at %p, stack: %p-%p\n", obj, info, info->id, start, info->stack_start, info->stack_end));
3650 /* FIXME: check register */
3653 /* return TRUE if ptr points inside the managed heap */
3655 ptr_in_heap (void* ptr)
3657 mword p = (mword)ptr;
3658 if (p < lowest_heap_address || p >= highest_heap_address)
3660 /* FIXME: more checks */
3665 handle_remset (mword *p, void *start_nursery, void *end_nursery, gboolean global)
3670 /* FIXME: exclude stack locations */
3671 switch ((*p) & REMSET_TYPE_MASK) {
3672 case REMSET_LOCATION:
3674 if ((ptr < start_nursery || ptr >= end_nursery) && ptr_in_heap (ptr)) {
3675 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3676 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p\n", ptr, *ptr));
3677 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3678 add_to_global_remset (ptr);
3680 DEBUG (9, fprintf (gc_debug_file, "Skipping remset at %p holding %p\n", ptr, *ptr));
3684 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3685 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3688 while (count-- > 0) {
3689 *ptr = copy_object (*ptr, start_nursery, end_nursery);
3690 DEBUG (9, fprintf (gc_debug_file, "Overwrote remset at %p with %p (count: %d)\n", ptr, *ptr, (int)count));
3691 if (!global && *ptr >= start_nursery && *ptr < end_nursery)
3692 add_to_global_remset (ptr);
3697 ptr = (void**)(*p & ~REMSET_TYPE_MASK);
3698 if ((ptr >= start_nursery && ptr < end_nursery) || !ptr_in_heap (ptr))
3700 scan_object (*ptr, start_nursery, end_nursery);
3703 g_assert_not_reached ();
3709 scan_from_remsets (void *start_nursery, void *end_nursery)
3712 SgenThreadInfo *info;
3713 RememberedSet *remset, *next;
3716 /* the global one */
3717 for (remset = global_remset; remset; remset = remset->next) {
3718 DEBUG (4, fprintf (gc_debug_file, "Scanning global remset range: %p-%p, size: %d\n", remset->data, remset->store_next, remset->store_next - remset->data));
3719 for (p = remset->data; p < remset->store_next;) {
3720 p = handle_remset (p, start_nursery, end_nursery, TRUE);
3723 /* the per-thread ones */
3724 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3725 for (info = thread_table [i]; info; info = info->next) {
3726 for (remset = info->remset; remset; remset = next) {
3727 DEBUG (4, fprintf (gc_debug_file, "Scanning remset for thread %p, range: %p-%p, size: %d\n", info, remset->data, remset->store_next, remset->store_next - remset->data));
3728 for (p = remset->data; p < remset->store_next;) {
3729 p = handle_remset (p, start_nursery, end_nursery, FALSE);
3731 remset->store_next = remset->data;
3732 next = remset->next;
3733 remset->next = NULL;
3734 if (remset != info->remset) {
3735 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3736 free_internal_mem (remset);
3744 * Clear the info in the remembered sets: we're doing a major collection, so
3745 * the per-thread ones are not needed and the global ones will be reconstructed
3749 clear_remsets (void)
3752 SgenThreadInfo *info;
3753 RememberedSet *remset, *next;
3755 /* the global list */
3756 for (remset = global_remset; remset; remset = next) {
3757 remset->store_next = remset->data;
3758 next = remset->next;
3759 remset->next = NULL;
3760 if (remset != global_remset) {
3761 DEBUG (4, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3762 free_internal_mem (remset);
3765 /* the per-thread ones */
3766 for (i = 0; i < THREAD_HASH_SIZE; ++i) {
3767 for (info = thread_table [i]; info; info = info->next) {
3768 for (remset = info->remset; remset; remset = next) {
3769 remset->store_next = remset->data;
3770 next = remset->next;
3771 remset->next = NULL;
3772 if (remset != info->remset) {
3773 DEBUG (1, fprintf (gc_debug_file, "Freed remset at %p\n", remset->data));
3774 free_internal_mem (remset);
3781 /* LOCKING: assumes the GC lock is held */
3782 static SgenThreadInfo*
3783 gc_register_current_thread (void *addr)
3786 SgenThreadInfo* info = malloc (sizeof (SgenThreadInfo));
3789 info->id = pthread_self ();
3790 info->stop_count = -1;
3793 info->stack_start = NULL;
3795 /* try to get it with attributes first */
3796 #if defined(HAVE_PTHREAD_GETATTR_NP) && defined(HAVE_PTHREAD_ATTR_GETSTACK)
3800 pthread_attr_t attr;
3801 pthread_getattr_np (pthread_self (), &attr);
3802 pthread_attr_getstack (&attr, &sstart, &size);
3803 info->stack_end = (char*)sstart + size;
3805 #elif defined(HAVE_PTHREAD_GET_STACKSIZE_NP) && defined(HAVE_PTHREAD_GET_STACKADDR_NP)
3806 info->stack_end = (char*)pthread_get_stackaddr_np (pthread_self ());
3809 /* FIXME: we assume the stack grows down */
3810 gsize stack_bottom = (gsize)addr;
3811 stack_bottom += 4095;
3812 stack_bottom &= ~4095;
3813 info->stack_end = (char*)stack_bottom;
3817 /* hash into the table */
3818 hash = HASH_PTHREAD_T (info->id) % THREAD_HASH_SIZE;
3819 info->next = thread_table [hash];
3820 thread_table [hash] = info;
3822 remembered_set = info->remset = alloc_remset (DEFAULT_REMSET_SIZE, info);
3823 DEBUG (3, fprintf (gc_debug_file, "registered thread %p (%p) (hash: %d)\n", info, (gpointer)info->id, hash));
3828 unregister_current_thread (void)
3831 SgenThreadInfo *prev = NULL;
3833 pthread_t id = pthread_self ();
3835 hash = HASH_PTHREAD_T (id) % THREAD_HASH_SIZE;
3836 p = thread_table [hash];
3838 DEBUG (3, fprintf (gc_debug_file, "unregister thread %p (%p)\n", p, (gpointer)p->id));
3839 while (!pthread_equal (p->id, id)) {
3844 thread_table [hash] = p->next;
3846 prev->next = p->next;
3848 /* FIXME: transfer remsets if any */
3853 gc_start_thread (void *arg)
3855 SgenThreadStartInfo *start_info = arg;
3856 SgenThreadInfo* info;
3857 void *t_arg = start_info->arg;
3858 void *(*start_func) (void*) = start_info->start_routine;
3862 info = gc_register_current_thread (&result);
3864 sem_post (&(start_info->registered));
3865 result = start_func (t_arg);
3867 unregister_current_thread ();
3874 mono_gc_pthread_create (pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg)
3876 SgenThreadStartInfo *start_info;
3879 start_info = malloc (sizeof (SgenThreadStartInfo));
3882 sem_init (&(start_info->registered), 0, 0);
3883 start_info->arg = arg;
3884 start_info->start_routine = start_routine;
3886 result = pthread_create (new_thread, attr, gc_start_thread, start_info);
3888 while (sem_wait (&(start_info->registered)) != 0) {
3889 /*if (EINTR != errno) ABORT("sem_wait failed"); */
3892 sem_destroy (&(start_info->registered));
3898 mono_gc_pthread_join (pthread_t thread, void **retval)
3900 return pthread_join (thread, retval);
3904 mono_gc_pthread_detach (pthread_t thread)
3906 return pthread_detach (thread);
3910 mono_gc_register_thread (void *baseptr)
3912 SgenThreadInfo *info;
3914 info = thread_info_lookup (pthread_self ());
3916 info = gc_register_current_thread (baseptr);
3918 return info != NULL;
3922 * ######################################################################
3923 * ######## Write barriers
3924 * ######################################################################
3927 static RememberedSet*
3928 alloc_remset (int size, gpointer id) {
3929 RememberedSet* res = get_internal_mem (sizeof (RememberedSet) + (size * sizeof (gpointer)));
3930 res->store_next = res->data;
3931 res->end_set = res->data + size;
3933 DEBUG (4, fprintf (gc_debug_file, "Allocated remset size %d at %p for %p\n", size, res->data, id));
3938 * Note: the write barriers first do the needed GC work and then do the actual store:
3939 * this way the value is visible to the conservative GC scan after the write barrier
3940 * itself. If a GC interrupts the barrier in the middle, value will be kept alive by
3941 * the conservative scan, otherwise by the remembered set scan. FIXME: figure out what
3942 * happens when we need to record which pointers contain references to the new generation.
3943 * The write barrier will be executed, but the pointer is still not stored.
3946 mono_gc_wbarrier_set_field (MonoObject *obj, gpointer field_ptr, MonoObject* value)
3949 if (field_ptr >= nursery_start && field_ptr < nursery_real_end) {
3950 *(void**)field_ptr = value;
3953 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", field_ptr));
3954 rs = remembered_set;
3955 if (rs->store_next < rs->end_set) {
3956 *(rs->store_next++) = (mword)field_ptr;
3957 *(void**)field_ptr = value;
3960 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3961 rs->next = remembered_set;
3962 remembered_set = rs;
3963 thread_info_lookup (pthread_self())->remset = rs;
3964 *(rs->store_next++) = (mword)field_ptr;
3965 *(void**)field_ptr = value;
3969 mono_gc_wbarrier_set_arrayref (MonoArray *arr, gpointer slot_ptr, MonoObject* value)
3971 RememberedSet *rs = remembered_set;
3972 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end) {
3973 *(void**)slot_ptr = value;
3976 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", slot_ptr));
3977 if (rs->store_next < rs->end_set) {
3978 *(rs->store_next++) = (mword)slot_ptr;
3979 *(void**)slot_ptr = value;
3982 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
3983 rs->next = remembered_set;
3984 remembered_set = rs;
3985 thread_info_lookup (pthread_self())->remset = rs;
3986 *(rs->store_next++) = (mword)slot_ptr;
3987 *(void**)slot_ptr = value;
3991 mono_gc_wbarrier_arrayref_copy (MonoArray *arr, gpointer slot_ptr, int count)
3993 RememberedSet *rs = remembered_set;
3994 if (slot_ptr >= nursery_start && slot_ptr < nursery_real_end)
3996 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p, %d\n", slot_ptr, count));
3997 if (rs->store_next + 1 < rs->end_set) {
3998 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
3999 *(rs->store_next++) = count;
4002 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4003 rs->next = remembered_set;
4004 remembered_set = rs;
4005 thread_info_lookup (pthread_self())->remset = rs;
4006 *(rs->store_next++) = (mword)slot_ptr | REMSET_RANGE;
4007 *(rs->store_next++) = count;
4011 mono_gc_wbarrier_generic_store (gpointer ptr, MonoObject* value)
4013 RememberedSet *rs = remembered_set;
4014 if (ptr >= nursery_start && ptr < nursery_real_end) {
4015 DEBUG (8, fprintf (gc_debug_file, "Skipping remset at %p\n", ptr));
4016 *(void**)ptr = value;
4019 DEBUG (8, fprintf (gc_debug_file, "Adding remset at %p\n", ptr));
4020 /* FIXME: ensure it is on the heap */
4021 if (rs->store_next < rs->end_set) {
4022 *(rs->store_next++) = (mword)ptr;
4023 *(void**)ptr = value;
4026 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4027 rs->next = remembered_set;
4028 remembered_set = rs;
4029 thread_info_lookup (pthread_self())->remset = rs;
4030 *(rs->store_next++) = (mword)ptr;
4031 *(void**)ptr = value;
4035 mono_gc_wbarrier_value_copy (gpointer dest, gpointer src, int count, MonoClass *klass)
4037 if (dest >= nursery_start && dest < nursery_real_end) {
4040 DEBUG (1, fprintf (gc_debug_file, "Adding value remset at %p, count %d for class %s\n", dest, count, klass->name));
4044 * mono_gc_wbarrier_object:
4046 * Write barrier to call when obj is the result of a clone or copy of an object.
4049 mono_gc_wbarrier_object (MonoObject* obj)
4051 RememberedSet *rs = remembered_set;
4052 DEBUG (1, fprintf (gc_debug_file, "Adding object remset for %p\n", obj));
4053 if (rs->store_next < rs->end_set) {
4054 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4057 rs = alloc_remset (rs->end_set - rs->data, (void*)1);
4058 rs->next = remembered_set;
4059 remembered_set = rs;
4060 thread_info_lookup (pthread_self())->remset = rs;
4061 *(rs->store_next++) = (mword)obj | REMSET_OBJECT;
4065 * ######################################################################
4066 * ######## Other mono public interface functions.
4067 * ######################################################################
4071 mono_gc_collect (int generation)
4074 update_current_thread_stack (&generation);
4076 if (generation == 0) {
4077 collect_nursery (0);
4079 major_collection ();
4086 mono_gc_max_generation (void)
4092 mono_gc_collection_count (int generation)
4094 if (generation == 0)
4095 return num_minor_gcs;
4096 return num_major_gcs;
4100 mono_gc_get_used_size (void)
4103 GCMemSection *section;
4105 tot = los_memory_usage;
4106 for (section = section_list; section; section = section->next) {
4107 /* this is approximate... */
4108 tot += section->next_data - section->data;
4110 /* FIXME: account for pinned objects */
4116 mono_gc_get_heap_size (void)
4122 mono_gc_disable (void)
4130 mono_gc_enable (void)
4138 mono_object_is_alive (MonoObject* o)
4144 mono_gc_get_generation (MonoObject *obj)
4146 if ((char*)obj >= nursery_start && (char*)obj < nursery_real_end)
4152 mono_gc_enable_events (void)
4157 mono_gc_weak_link_add (void **link_addr, MonoObject *obj)
4159 mono_gc_register_disappearing_link (obj, link_addr);
4164 mono_gc_weak_link_remove (void **link_addr)
4166 mono_gc_register_disappearing_link (NULL, link_addr);
4171 mono_gc_weak_link_get (void **link_addr)
4177 mono_gc_make_descr_from_bitmap (gsize *bitmap, int numbits)
4179 if (numbits < ((sizeof (*bitmap) * 8) - ROOT_DESC_TYPE_SHIFT)) {
4180 mword desc = ROOT_DESC_BITMAP | (bitmap [0] << ROOT_DESC_TYPE_SHIFT);
4183 /* conservative scanning */
4184 DEBUG (3, fprintf (gc_debug_file, "Conservative root descr for size: %d\n", numbits));
4189 mono_gc_alloc_fixed (size_t size, void *descr)
4191 /* FIXME: do a single allocation */
4192 void *res = calloc (1, size);
4195 if (!mono_gc_register_root (res, size, descr)) {
4203 mono_gc_free_fixed (void* addr)
4205 mono_gc_deregister_root (addr);
4210 mono_gc_is_gc_thread (void)
4214 result = thread_info_lookup (pthread_self ()) != NULL;
4220 mono_gc_base_init (void)
4223 struct sigaction sinfo;
4226 if (gc_initialized) {
4230 gc_initialized = TRUE;
4231 gc_debug_file = stderr;
4232 /* format: MONO_GC_DEBUG=l[,filename] where l is a debug level 0-9 */
4233 if ((env = getenv ("MONO_GC_DEBUG"))) {
4234 if (env [0] >= '0' && env [0] <= '9') {
4235 gc_debug_level = atoi (env);
4241 char *rf = g_strdup_printf ("%s.%d", env, getpid ());
4242 gc_debug_file = fopen (rf, "wb");
4244 gc_debug_file = stderr;
4249 sem_init (&suspend_ack_semaphore, 0, 0);
4251 sigfillset (&sinfo.sa_mask);
4252 sinfo.sa_flags = SA_RESTART | SA_SIGINFO;
4253 sinfo.sa_handler = suspend_handler;
4254 if (sigaction (suspend_signal_num, &sinfo, NULL) != 0) {
4255 g_error ("failed sigaction");
4258 sinfo.sa_handler = restart_handler;
4259 if (sigaction (restart_signal_num, &sinfo, NULL) != 0) {
4260 g_error ("failed sigaction");
4263 sigfillset (&suspend_signal_mask);
4264 sigdelset (&suspend_signal_mask, restart_signal_num);
4266 global_remset = alloc_remset (1024, NULL);
4267 global_remset->next = NULL;
4270 mono_gc_register_thread (&sinfo);
4273 #endif /* HAVE_SGEN_GC */