2 * sgen-alloc.c: Object allocation routines + managed allocators
5 * Paolo Molaro (lupus@ximian.com)
6 * Rodrigo Kumpera (kumpera@gmail.com)
8 * Copyright 2005-2011 Novell, Inc (http://www.novell.com)
9 * Copyright 2011 Xamarin Inc (http://www.xamarin.com)
10 * Copyright 2011 Xamarin, Inc.
11 * Copyright (C) 2012 Xamarin Inc
13 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
17 * ######################################################################
18 * ######## Object allocation
19 * ######################################################################
20 * This section of code deals with allocating memory for objects.
21 * There are several ways:
22 * *) allocate large objects
23 * *) allocate normal objects
24 * *) fast lock-free allocation
25 * *) allocation of pinned objects
33 #include "mono/sgen/sgen-gc.h"
34 #include "mono/sgen/sgen-protocol.h"
35 #include "mono/sgen/sgen-memory-governor.h"
36 #include "mono/sgen/sgen-client.h"
37 #include "mono/utils/mono-memory-model.h"
39 #define ALIGN_UP SGEN_ALIGN_UP
40 #define ALLOC_ALIGN SGEN_ALLOC_ALIGN
41 #define MAX_SMALL_OBJ_SIZE SGEN_MAX_SMALL_OBJ_SIZE
43 #ifdef HEAVY_STATISTICS
44 static guint64 stat_objects_alloced = 0;
45 static guint64 stat_bytes_alloced = 0;
46 static guint64 stat_bytes_alloced_los = 0;
51 * Allocation is done from a Thread Local Allocation Buffer (TLAB). TLABs are allocated
52 * from nursery fragments.
53 * tlab_next is the pointer to the space inside the TLAB where the next object will
55 * tlab_temp_end is the pointer to the end of the temporary space reserved for
56 * the allocation: it allows us to set the scan starts at reasonable intervals.
57 * tlab_real_end points to the end of the TLAB.
61 * FIXME: What is faster, a TLS variable pointing to a structure, or separate TLS
62 * variables for next+temp_end ?
65 static __thread char *tlab_start;
66 static __thread char *tlab_next;
67 static __thread char *tlab_temp_end;
68 static __thread char *tlab_real_end;
69 /* Used by the managed allocator/wbarrier */
70 static __thread char **tlab_next_addr MONO_ATTR_USED;
71 #ifndef SGEN_WITHOUT_MONO
72 static __thread volatile int *in_critical_region_addr MONO_ATTR_USED;
77 #define TLAB_START tlab_start
78 #define TLAB_NEXT tlab_next
79 #define TLAB_TEMP_END tlab_temp_end
80 #define TLAB_REAL_END tlab_real_end
82 #define TLAB_START (__thread_info__->tlab_start)
83 #define TLAB_NEXT (__thread_info__->tlab_next)
84 #define TLAB_TEMP_END (__thread_info__->tlab_temp_end)
85 #define TLAB_REAL_END (__thread_info__->tlab_real_end)
89 alloc_degraded (GCVTable vtable, size_t size, gboolean for_mature)
94 sgen_client_degraded_allocation (size);
95 SGEN_ATOMIC_ADD_P (degraded_mode, size);
96 sgen_ensure_free_space (size, GENERATION_OLD);
98 if (sgen_need_major_collection (size))
99 sgen_perform_collection (size, GENERATION_OLD, "mature allocation failure", !for_mature);
103 p = major_collector.alloc_degraded (vtable, size);
106 binary_protocol_alloc_degraded (p, vtable, size, sgen_client_get_provenance ());
112 zero_tlab_if_necessary (void *p, size_t size)
114 if (nursery_clear_policy == CLEAR_AT_TLAB_CREATION || nursery_clear_policy == CLEAR_AT_TLAB_CREATION_DEBUG) {
118 * This function is called for all allocations in
119 * TLABs. TLABs originate from fragments, which are
120 * initialized to be faux arrays. The remainder of
121 * the fragments are zeroed out at initialization for
122 * CLEAR_AT_GC, so here we just need to make sure that
123 * the array header is zeroed. Since we don't know
124 * whether we're called for the start of a fragment or
125 * for somewhere in between, we zero in any case, just
128 sgen_client_zero_array_fill_header (p, size);
133 * Provide a variant that takes just the vtable for small fixed-size objects.
134 * The aligned size is already computed and stored in vt->gc_descr.
135 * Note: every SGEN_SCAN_START_SIZE or so we are given the chance to do some special
136 * processing. We can keep track of where objects start, for example,
137 * so when we scan the thread stacks for pinned objects, we can start
138 * a search for the pinned object in SGEN_SCAN_START_SIZE chunks.
141 sgen_alloc_obj_nolock (GCVTable vtable, size_t size)
143 /* FIXME: handle OOM */
146 size_t real_size = size;
151 HEAVY_STAT (++stat_objects_alloced);
152 if (real_size <= SGEN_MAX_SMALL_OBJ_SIZE)
153 HEAVY_STAT (stat_bytes_alloced += size);
155 HEAVY_STAT (stat_bytes_alloced_los += size);
157 size = ALIGN_UP (size);
159 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
161 if (G_UNLIKELY (has_per_allocation_action)) {
162 static int alloc_count;
163 int current_alloc = InterlockedIncrement (&alloc_count);
165 if (collect_before_allocs) {
166 if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
167 sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE);
168 if (!degraded_mode && sgen_can_alloc_size (size) && real_size <= SGEN_MAX_SMALL_OBJ_SIZE) {
170 g_assert_not_reached ();
173 } else if (verify_before_allocs) {
174 if ((current_alloc % verify_before_allocs) == 0)
175 sgen_check_whole_heap_stw ();
180 * We must already have the lock here instead of after the
181 * fast path because we might be interrupted in the fast path
182 * (after confirming that new_next < TLAB_TEMP_END) by the GC,
183 * and we'll end up allocating an object in a fragment which
184 * no longer belongs to us.
186 * The managed allocator does not do this, but it's treated
187 * specially by the world-stopping code.
190 if (real_size > SGEN_MAX_SMALL_OBJ_SIZE) {
191 p = (void **)sgen_los_alloc_large_inner (vtable, ALIGN_UP (real_size));
193 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
195 p = (void**)TLAB_NEXT;
196 /* FIXME: handle overflow */
197 new_next = (char*)p + size;
198 TLAB_NEXT = new_next;
200 if (G_LIKELY (new_next < TLAB_TEMP_END)) {
204 * FIXME: We might need a memory barrier here so the change to tlab_next is
205 * visible before the vtable store.
208 CANARIFY_ALLOC(p,real_size);
209 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
210 binary_protocol_alloc (p , vtable, size, sgen_client_get_provenance ());
211 g_assert (*p == NULL);
212 mono_atomic_store_seq (p, vtable);
219 /* there are two cases: the object is too big or we run out of space in the TLAB */
220 /* we also reach here when the thread does its first allocation after a minor
221 * collection, since the tlab_ variables are initialized to NULL.
222 * there can be another case (from ORP), if we cooperate with the runtime a bit:
223 * objects that need finalizers can have the high bit set in their size
224 * so the above check fails and we can readily add the object to the queue.
225 * This avoids taking again the GC lock when registering, but this is moot when
226 * doing thread-local allocation, so it may not be a good idea.
228 if (TLAB_NEXT >= TLAB_REAL_END) {
229 int available_in_tlab;
231 * Run out of space in the TLAB. When this happens, some amount of space
232 * remains in the TLAB, but not enough to satisfy the current allocation
233 * request. Currently, we retire the TLAB in all cases, later we could
234 * keep it if the remaining space is above a treshold, and satisfy the
235 * allocation directly from the nursery.
238 /* when running in degraded mode, we continue allocing that way
239 * for a while, to decrease the number of useless nursery collections.
241 if (degraded_mode && degraded_mode < DEFAULT_NURSERY_SIZE)
242 return alloc_degraded (vtable, size, FALSE);
244 available_in_tlab = (int)(TLAB_REAL_END - TLAB_NEXT);//We'll never have tlabs > 2Gb
245 if (size > tlab_size || available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
246 /* Allocate directly from the nursery */
247 p = (void **)sgen_nursery_alloc (size);
250 * We couldn't allocate from the nursery, so we try
251 * collecting. Even after the collection, we might
252 * still not have enough memory to allocate the
253 * object. The reason will most likely be that we've
254 * run out of memory, but there is the theoretical
255 * possibility that other threads might have consumed
256 * the freed up memory ahead of us.
258 * What we do in this case is allocate degraded, i.e.,
259 * from the major heap.
261 * Ideally we'd like to detect the case of other
262 * threads allocating ahead of us and loop (if we
263 * always loop we will loop endlessly in the case of
266 sgen_ensure_free_space (real_size, GENERATION_NURSERY);
268 p = (void **)sgen_nursery_alloc (size);
271 return alloc_degraded (vtable, size, FALSE);
273 zero_tlab_if_necessary (p, size);
275 size_t alloc_size = 0;
277 SGEN_LOG (3, "Retire TLAB: %p-%p [%ld]", TLAB_START, TLAB_REAL_END, (long)(TLAB_REAL_END - TLAB_NEXT - size));
278 sgen_nursery_retire_region (p, available_in_tlab);
280 p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
282 /* See comment above in similar case. */
283 sgen_ensure_free_space (tlab_size, GENERATION_NURSERY);
285 p = (void **)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
288 return alloc_degraded (vtable, size, FALSE);
290 /* Allocate a new TLAB from the current nursery fragment */
291 TLAB_START = (char*)p;
292 TLAB_NEXT = TLAB_START;
293 TLAB_REAL_END = TLAB_START + alloc_size;
294 TLAB_TEMP_END = TLAB_START + MIN (SGEN_SCAN_START_SIZE, alloc_size);
296 zero_tlab_if_necessary (TLAB_START, alloc_size);
298 /* Allocate from the TLAB */
299 p = (void **)TLAB_NEXT;
301 sgen_set_nursery_scan_start ((char*)p);
304 /* Reached tlab_temp_end */
306 /* record the scan start so we can find pinned objects more easily */
307 sgen_set_nursery_scan_start ((char*)p);
308 /* we just bump tlab_temp_end as well */
309 TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
310 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
312 CANARIFY_ALLOC(p,real_size);
316 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
317 binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
318 mono_atomic_store_seq (p, vtable);
325 sgen_try_alloc_obj_nolock (GCVTable vtable, size_t size)
329 size_t real_size = size;
334 size = ALIGN_UP (size);
335 SGEN_ASSERT (9, real_size >= SGEN_CLIENT_MINIMUM_OBJECT_SIZE, "Object too small");
337 SGEN_ASSERT (6, sgen_vtable_get_descriptor (vtable), "VTable without descriptor");
339 if (real_size > SGEN_MAX_SMALL_OBJ_SIZE)
342 if (G_UNLIKELY (size > tlab_size)) {
343 /* Allocate directly from the nursery */
344 p = (void **)sgen_nursery_alloc (size);
347 sgen_set_nursery_scan_start ((char*)p);
349 /*FIXME we should use weak memory ops here. Should help specially on x86. */
350 zero_tlab_if_necessary (p, size);
352 int available_in_tlab;
354 /* tlab_next and tlab_temp_end are TLS vars so accessing them might be expensive */
356 p = (void**)TLAB_NEXT;
357 /* FIXME: handle overflow */
358 new_next = (char*)p + size;
360 real_end = TLAB_REAL_END;
361 available_in_tlab = (int)(real_end - (char*)p);//We'll never have tlabs > 2Gb
363 if (G_LIKELY (new_next < real_end)) {
364 TLAB_NEXT = new_next;
366 /* Second case, we overflowed temp end */
367 if (G_UNLIKELY (new_next >= TLAB_TEMP_END)) {
368 sgen_set_nursery_scan_start (new_next);
369 /* we just bump tlab_temp_end as well */
370 TLAB_TEMP_END = MIN (TLAB_REAL_END, TLAB_NEXT + SGEN_SCAN_START_SIZE);
371 SGEN_LOG (5, "Expanding local alloc: %p-%p", TLAB_NEXT, TLAB_TEMP_END);
373 } else if (available_in_tlab > SGEN_MAX_NURSERY_WASTE) {
374 /* Allocate directly from the nursery */
375 p = (void **)sgen_nursery_alloc (size);
379 zero_tlab_if_necessary (p, size);
381 size_t alloc_size = 0;
383 sgen_nursery_retire_region (p, available_in_tlab);
384 new_next = (char *)sgen_nursery_alloc_range (tlab_size, size, &alloc_size);
385 p = (void**)new_next;
389 TLAB_START = (char*)new_next;
390 TLAB_NEXT = new_next + size;
391 TLAB_REAL_END = new_next + alloc_size;
392 TLAB_TEMP_END = new_next + MIN (SGEN_SCAN_START_SIZE, alloc_size);
393 sgen_set_nursery_scan_start ((char*)p);
395 zero_tlab_if_necessary (new_next, alloc_size);
399 HEAVY_STAT (++stat_objects_alloced);
400 HEAVY_STAT (stat_bytes_alloced += size);
402 CANARIFY_ALLOC(p,real_size);
403 SGEN_LOG (6, "Allocated object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
404 binary_protocol_alloc (p, vtable, size, sgen_client_get_provenance ());
405 g_assert (*p == NULL); /* FIXME disable this in non debug builds */
407 mono_atomic_store_seq (p, vtable);
413 sgen_alloc_obj (GCVTable vtable, size_t size)
418 if (!SGEN_CAN_ALIGN_UP (size))
421 if (G_UNLIKELY (has_per_allocation_action)) {
422 static int alloc_count;
423 int current_alloc = InterlockedIncrement (&alloc_count);
425 if (verify_before_allocs) {
426 if ((current_alloc % verify_before_allocs) == 0)
427 sgen_check_whole_heap_stw ();
429 if (collect_before_allocs) {
430 if (((current_alloc % collect_before_allocs) == 0) && nursery_section) {
432 sgen_perform_collection (0, GENERATION_NURSERY, "collect-before-alloc-triggered", TRUE);
438 ENTER_CRITICAL_REGION;
439 res = sgen_try_alloc_obj_nolock (vtable, size);
441 EXIT_CRITICAL_REGION;
444 EXIT_CRITICAL_REGION;
447 res = sgen_alloc_obj_nolock (vtable, size);
453 * To be used for interned strings and possibly MonoThread, reflection handles.
454 * We may want to explicitly free these objects.
457 sgen_alloc_obj_pinned (GCVTable vtable, size_t size)
461 if (!SGEN_CAN_ALIGN_UP (size))
463 size = ALIGN_UP (size);
467 if (size > SGEN_MAX_SMALL_OBJ_SIZE) {
468 /* large objects are always pinned anyway */
469 p = (GCObject *)sgen_los_alloc_large_inner (vtable, size);
471 SGEN_ASSERT (9, sgen_client_vtable_is_inited (vtable), "class %s:%s is not initialized", sgen_client_vtable_get_namespace (vtable), sgen_client_vtable_get_name (vtable));
472 p = major_collector.alloc_small_pinned_obj (vtable, size, SGEN_VTABLE_HAS_REFERENCES (vtable));
475 SGEN_LOG (6, "Allocated pinned object %p, vtable: %p (%s), size: %zd", p, vtable, sgen_client_vtable_get_name (vtable), size);
476 binary_protocol_alloc_pinned (p, vtable, size, sgen_client_get_provenance ());
483 sgen_alloc_obj_mature (GCVTable vtable, size_t size)
487 if (!SGEN_CAN_ALIGN_UP (size))
489 size = ALIGN_UP (size);
492 res = alloc_degraded (vtable, size, TRUE);
499 sgen_init_tlab_info (SgenThreadInfo* info)
501 #ifndef HAVE_KW_THREAD
502 SgenThreadInfo *__thread_info__ = info;
505 info->tlab_start_addr = &TLAB_START;
506 info->tlab_next_addr = &TLAB_NEXT;
507 info->tlab_temp_end_addr = &TLAB_TEMP_END;
508 info->tlab_real_end_addr = &TLAB_REAL_END;
510 #ifdef HAVE_KW_THREAD
511 tlab_next_addr = &tlab_next;
512 #ifndef SGEN_WITHOUT_MONO
513 in_critical_region_addr = &info->client_info.in_critical_region;
519 * Clear the thread local TLAB variables for all threads.
522 sgen_clear_tlabs (void)
524 FOREACH_THREAD (info) {
525 /* A new TLAB will be allocated when the thread does its first allocation */
526 *info->tlab_start_addr = NULL;
527 *info->tlab_next_addr = NULL;
528 *info->tlab_temp_end_addr = NULL;
529 *info->tlab_real_end_addr = NULL;
534 sgen_init_allocator (void)
536 #if defined(HAVE_KW_THREAD) && !defined(SGEN_WITHOUT_MONO)
537 int tlab_next_addr_offset = -1;
538 int tlab_temp_end_offset = -1;
539 int in_critical_region_addr_offset = -1;
541 MONO_THREAD_VAR_OFFSET (tlab_next_addr, tlab_next_addr_offset);
542 MONO_THREAD_VAR_OFFSET (tlab_temp_end, tlab_temp_end_offset);
543 MONO_THREAD_VAR_OFFSET (in_critical_region_addr, in_critical_region_addr_offset);
545 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_NEXT_ADDR, tlab_next_addr_offset);
546 mono_tls_key_set_offset (TLS_KEY_SGEN_TLAB_TEMP_END, tlab_temp_end_offset);
547 mono_tls_key_set_offset (TLS_KEY_SGEN_IN_CRITICAL_REGION_ADDR, in_critical_region_addr_offset);
550 #ifdef HEAVY_STATISTICS
551 mono_counters_register ("# objects allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_alloced);
552 mono_counters_register ("bytes allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced);
553 mono_counters_register ("bytes allocated in LOS", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced_los);
557 #endif /*HAVE_SGEN_GC*/