Merge pull request #5198 from BrzVlad/fix-binprot-stats
[mono.git] / mono / sgen / sgen-marksweep.c
1 /**
2  * \file
3  * The Mark & Sweep major collector.
4  *
5  * Author:
6  *      Mark Probst <mark.probst@gmail.com>
7  *
8  * Copyright 2009-2010 Novell, Inc.
9  * Copyright (C) 2012 Xamarin Inc
10  *
11  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
12  */
13
14 #include "config.h"
15
16 #ifdef HAVE_SGEN_GC
17
18 #include <math.h>
19 #include <errno.h>
20 #include <string.h>
21 #include <stdlib.h>
22
23 #include "mono/sgen/sgen-gc.h"
24 #include "mono/sgen/sgen-protocol.h"
25 #include "mono/sgen/sgen-cardtable.h"
26 #include "mono/sgen/sgen-memory-governor.h"
27 #include "mono/sgen/sgen-layout-stats.h"
28 #include "mono/sgen/sgen-pointer-queue.h"
29 #include "mono/sgen/sgen-array-list.h"
30 #include "mono/sgen/sgen-pinning.h"
31 #include "mono/sgen/sgen-workers.h"
32 #include "mono/sgen/sgen-thread-pool.h"
33 #include "mono/sgen/sgen-client.h"
34 #include "mono/utils/mono-memory-model.h"
35 #include "mono/utils/mono-proclib.h"
36
37 static int ms_block_size;
38
39 /*
40  * Blocks must be at least this size, meaning that if we detect a
41  * page size lower than this, we'll use this instead.
42  */
43 #define MS_BLOCK_SIZE_MIN (1024 * 16)
44
45 #define CARDS_PER_BLOCK (ms_block_size / CARD_SIZE_IN_BYTES)
46
47 /*
48  * Don't allocate single blocks, but alloc a contingent of this many
49  * blocks in one swoop.  This must be a power of two.
50  */
51 #define MS_BLOCK_ALLOC_NUM      32
52
53 #define MS_NUM_MARK_WORDS       ((ms_block_size / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8))
54
55 /*
56  * Use this instead of sizeof (MSBlockInfo) since the mark_words
57  * array size depends on page size at runtime.
58  */
59 #define SIZEOF_MS_BLOCK_INFO (sizeof (MSBlockInfo) + sizeof (guint32) * (MS_NUM_MARK_WORDS - MONO_ZERO_LEN_ARRAY))
60
61 /*
62  * Number of bytes before the first object in a block.  At the start
63  * of a block is the MSBlockHeader, then opional padding, then come
64  * the objects, so this must be >= SIZEOF_MS_BLOCK_INFO.
65  */
66 #define MS_BLOCK_SKIP   ((SIZEOF_MS_BLOCK_INFO + 15) & ~15)
67
68 #define MS_BLOCK_FREE   (ms_block_size - MS_BLOCK_SKIP)
69
70 /*
71  * Blocks progress from one state to the next:
72  *
73  * SWEPT           The block is fully swept.  It might or might not be in
74  *                 a free list.
75  *
76  * MARKING         The block might or might not contain live objects.  If
77  *                 we're in between an initial collection pause and the
78  *                 finishing pause, the block might or might not be in a
79  *                 free list.
80  *
81  * CHECKING        The sweep thread is investigating the block to determine
82  *                 whether or not it contains live objects.  The block is
83  *                 not in a free list.
84  *
85  * NEED_SWEEPING   The block contains live objects but has not yet been
86  *                 swept.  It also contains free slots.  It is in a block
87  *                 free list.
88  *
89  * SWEEPING        The block is being swept.  It might be in a free list.
90  */
91
92 enum {
93         BLOCK_STATE_SWEPT,
94         BLOCK_STATE_MARKING,
95         BLOCK_STATE_CHECKING,
96         BLOCK_STATE_NEED_SWEEPING,
97         BLOCK_STATE_SWEEPING
98 };
99
100 typedef struct _MSBlockInfo MSBlockInfo;
101 struct _MSBlockInfo {
102         guint16 obj_size;
103         /*
104          * FIXME: Do we even need this? It's only used during sweep and might be worth
105          * recalculating to save the space.
106          */
107         guint16 obj_size_index;
108         /* FIXME: Reduce this - it only needs a byte. */
109         volatile gint32 state;
110         gint16 nused;
111         unsigned int pinned : 1;
112         unsigned int has_references : 1;
113         unsigned int has_pinned : 1;    /* means cannot evacuate */
114         unsigned int is_to_space : 1;
115         void ** volatile free_list;
116         MSBlockInfo * volatile next_free;
117         guint8 * volatile cardtable_mod_union;
118         guint32 mark_words [MONO_ZERO_LEN_ARRAY];
119 };
120
121 #define MS_BLOCK_FOR_BLOCK_INFO(b)      ((char*)(b))
122
123 #define MS_BLOCK_OBJ(b,i)               ((GCObject *)(MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i)))
124 #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size)             (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i))
125 #define MS_BLOCK_DATA_FOR_OBJ(o)        ((char*)((mword)(o) & ~(mword)(ms_block_size - 1)))
126
127 typedef struct {
128         MSBlockInfo info;
129 } MSBlockHeader;
130
131 #define MS_BLOCK_FOR_OBJ(o)             (&((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
132
133 /* object index will always be small */
134 #define MS_BLOCK_OBJ_INDEX(o,b) ((int)(((char*)(o) - (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP)) / (b)->obj_size))
135
136 //casting to int is fine since blocks are 32k
137 #define MS_CALC_MARK_BIT(w,b,o)         do {                            \
138                 int i = ((int)((char*)(o) - MS_BLOCK_DATA_FOR_OBJ ((o)))) >> SGEN_ALLOC_ALIGN_BITS; \
139                 (w) = i >> 5;                                           \
140                 (b) = i & 31;                                           \
141         } while (0)
142
143 #define MS_MARK_BIT(bl,w,b)     ((bl)->mark_words [(w)] & (ONE_P << (b)))
144 #define MS_SET_MARK_BIT(bl,w,b) ((bl)->mark_words [(w)] |= (ONE_P << (b)))
145 #define MS_SET_MARK_BIT_PAR(bl,w,b,first)       do {                    \
146                 guint32 tmp_mark_word = (bl)->mark_words [(w)];         \
147                 guint32 old_mark_word;                                  \
148                 first = FALSE;                                          \
149                 while (!(tmp_mark_word & (ONE_P << (b)))) {             \
150                         old_mark_word = tmp_mark_word;                  \
151                         tmp_mark_word = InterlockedCompareExchange ((volatile gint32*)&(bl)->mark_words [w], old_mark_word | (ONE_P << (b)), old_mark_word); \
152                         if (tmp_mark_word == old_mark_word) {           \
153                                 first = TRUE;                           \
154                                 break;                                  \
155                         }                                               \
156                 }                                                       \
157         } while (0)
158
159
160 #define MS_OBJ_ALLOCED(o,b)     (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + ms_block_size))
161
162 #define MS_BLOCK_OBJ_SIZE_FACTOR        (pow (2.0, 1.0 / 3))
163
164 /*
165  * This way we can lookup block object size indexes for sizes up to
166  * 256 bytes with a single load.
167  */
168 #define MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES      32
169
170 static int *block_obj_sizes;
171 static int num_block_obj_sizes;
172 static int fast_block_obj_size_indexes [MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES];
173
174 #define MS_BLOCK_FLAG_PINNED    1
175 #define MS_BLOCK_FLAG_REFS      2
176
177 #define MS_BLOCK_TYPE_MAX       4
178
179 static gboolean *evacuate_block_obj_sizes;
180 static float evacuation_threshold = 0.666f;
181
182 static gboolean lazy_sweep = TRUE;
183
184 enum {
185         SWEEP_STATE_SWEPT,
186         SWEEP_STATE_NEED_SWEEPING,
187         SWEEP_STATE_SWEEPING,
188         SWEEP_STATE_SWEEPING_AND_ITERATING,
189         SWEEP_STATE_COMPACTING
190 };
191
192 static volatile int sweep_state = SWEEP_STATE_SWEPT;
193
194 static gboolean concurrent_mark;
195 static gboolean concurrent_sweep = TRUE;
196
197 int sweep_pool_context = -1;
198
199 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)      SGEN_POINTER_IS_TAGGED_1 ((bl))
200 #define BLOCK_TAG_HAS_REFERENCES(bl)            SGEN_POINTER_TAG_1 ((bl))
201
202 #define BLOCK_IS_TAGGED_CHECKING(bl)            SGEN_POINTER_IS_TAGGED_2 ((bl))
203 #define BLOCK_TAG_CHECKING(bl)                  SGEN_POINTER_TAG_2 ((bl))
204
205 #define BLOCK_UNTAG(bl)                         ((MSBlockInfo *)SGEN_POINTER_UNTAG_12 ((bl)))
206
207 #define BLOCK_TAG(bl)                           ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
208
209 /* all allocated blocks in the system */
210 static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_list_default_is_slot_set, sgen_array_list_default_cas_setter, INTERNAL_MEM_PIN_QUEUE);
211
212 /* non-allocated block free-list */
213 static void *empty_blocks = NULL;
214 static size_t num_empty_blocks = 0;
215 static gboolean compact_blocks = FALSE;
216
217 /*
218  * We can iterate the block list also while sweep is in progress but we
219  * need to account for blocks that will be checked for sweeping and even
220  * freed in the process.
221  */
222 #define FOREACH_BLOCK_NO_LOCK(bl) {                                     \
223         volatile gpointer *slot;                                                \
224         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
225                 (bl) = BLOCK_UNTAG (*slot);                             \
226                 if (!(bl))                                              \
227                         continue;
228 #define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                   \
229         volatile gpointer *slot;                                                \
230         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
231                 (bl) = (MSBlockInfo *) (*slot);                 \
232                 if (!(bl))                                              \
233                         continue;                                       \
234                 (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
235                 (bl) = BLOCK_UNTAG ((bl));
236 #define END_FOREACH_BLOCK_NO_LOCK       } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
237
238 #define FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) {     \
239         volatile gpointer *slot;                                        \
240         SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&allocated_blocks, begin, end, slot, index) { \
241                 (bl) = (MSBlockInfo *) (*slot);                         \
242                 if (!(bl))                                              \
243                         continue;                                       \
244                 (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
245                 (bl) = BLOCK_UNTAG ((bl));
246 #define END_FOREACH_BLOCK_RANGE_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
247
248 static volatile size_t num_major_sections = 0;
249 /*
250  * One free block list for each block object size.  We add and remove blocks from these
251  * lists lock-free via CAS.
252  *
253  * Blocks accessed/removed from `free_block_lists`:
254  *   from the mutator (with GC lock held)
255  *   in nursery collections
256  *   in non-concurrent major collections
257  *   in the finishing pause of concurrent major collections (whole list is cleared)
258  *
259  * Blocks added to `free_block_lists`:
260  *   in the sweeping thread
261  *   during nursery collections
262  *   from domain clearing (with the world stopped and no sweeping happening)
263  *
264  * The only item of those that doesn't require the GC lock is the sweep thread.  The sweep
265  * thread only ever adds blocks to the free list, so the ABA problem can't occur.
266  */
267 static MSBlockInfo * volatile *free_block_lists [MS_BLOCK_TYPE_MAX];
268 static MonoNativeTlsKey worker_block_free_list_key;
269
270 static guint64 stat_major_blocks_alloced = 0;
271 static guint64 stat_major_blocks_freed = 0;
272 static guint64 stat_major_blocks_lazy_swept = 0;
273
274 static guint64 stat_major_blocks_freed_ideal = 0;
275 static guint64 stat_major_blocks_freed_less_ideal = 0;
276 static guint64 stat_major_blocks_freed_individual = 0;
277 static guint64 stat_major_blocks_alloced_less_ideal = 0;
278
279 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
280 static guint64 num_major_objects_marked = 0;
281 #define INC_NUM_MAJOR_OBJECTS_MARKED()  (++num_major_objects_marked)
282 #else
283 #define INC_NUM_MAJOR_OBJECTS_MARKED()
284 #endif
285
286 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
287 static mono_mutex_t scanned_objects_list_lock;
288 static SgenPointerQueue scanned_objects_list;
289
290 static void
291 add_scanned_object (void *ptr)
292 {
293         if (!binary_protocol_is_enabled ())
294                 return;
295
296         mono_os_mutex_lock (&scanned_objects_list_lock);
297         sgen_pointer_queue_add (&scanned_objects_list, ptr);
298         mono_os_mutex_unlock (&scanned_objects_list_lock);
299 }
300 #endif
301
302 static gboolean sweep_block (MSBlockInfo *block);
303
304 static int
305 ms_find_block_obj_size_index (size_t size)
306 {
307         int i;
308         SGEN_ASSERT (9, size <= SGEN_MAX_SMALL_OBJ_SIZE, "size %zd is bigger than max small object size %d", size, SGEN_MAX_SMALL_OBJ_SIZE);
309         for (i = 0; i < num_block_obj_sizes; ++i)
310                 if (block_obj_sizes [i] >= size)
311                         return i;
312         g_error ("no object of size %zd\n", size);
313         return -1;
314 }
315
316 #define FREE_BLOCKS_FROM(lists,p,r)     (lists [((p) ? MS_BLOCK_FLAG_PINNED : 0) | ((r) ? MS_BLOCK_FLAG_REFS : 0)])
317 #define FREE_BLOCKS(p,r)                (FREE_BLOCKS_FROM (free_block_lists, (p), (r)))
318 #define FREE_BLOCKS_LOCAL(p,r)          (FREE_BLOCKS_FROM (((MSBlockInfo***)mono_native_tls_get_value (worker_block_free_list_key)), (p), (r)))
319
320 #define MS_BLOCK_OBJ_SIZE_INDEX(s)                              \
321         (((s)+7)>>3 < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES ?      \
322          fast_block_obj_size_indexes [((s)+7)>>3] :             \
323          ms_find_block_obj_size_index ((s)))
324
325 static void*
326 major_alloc_heap (mword nursery_size, mword nursery_align)
327 {
328         char *start;
329         if (nursery_align)
330                 start = (char *)sgen_alloc_os_memory_aligned (nursery_size, nursery_align, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), "nursery", MONO_MEM_ACCOUNT_SGEN_NURSERY);
331         else
332                 start = (char *)sgen_alloc_os_memory (nursery_size, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), "nursery", MONO_MEM_ACCOUNT_SGEN_NURSERY);
333
334         return start;
335 }
336
337 static void
338 update_heap_boundaries_for_block (MSBlockInfo *block)
339 {
340         sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size);
341 }
342
343 /*
344  * Thread safe
345  */
346 static void*
347 ms_get_empty_block (void)
348 {
349         char *p;
350         int i;
351         void *block, *empty, *next;
352
353  retry:
354         if (!empty_blocks) {
355                 /*
356                  * We try allocating MS_BLOCK_ALLOC_NUM blocks first.  If that's
357                  * unsuccessful, we halve the number of blocks and try again, until we're at
358                  * 1.  If that doesn't work, either, we assert.
359                  */
360                 int alloc_num = MS_BLOCK_ALLOC_NUM;
361                 for (;;) {
362                         p = (char *)sgen_alloc_os_memory_aligned (ms_block_size * alloc_num, ms_block_size,
363                                 (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE),
364                                 alloc_num == 1 ? "major heap section" : NULL, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
365                         if (p)
366                                 break;
367                         alloc_num >>= 1;
368                 }
369
370                 for (i = 0; i < alloc_num; ++i) {
371                         block = p;
372                         /*
373                          * We do the free list update one after the
374                          * other so that other threads can use the new
375                          * blocks as quickly as possible.
376                          */
377                         do {
378                                 empty = empty_blocks;
379                                 *(void**)block = empty;
380                         } while (SGEN_CAS_PTR ((gpointer*)&empty_blocks, block, empty) != empty);
381                         p += ms_block_size;
382                 }
383
384                 SGEN_ATOMIC_ADD_P (num_empty_blocks, alloc_num);
385
386                 stat_major_blocks_alloced += alloc_num;
387 #if SIZEOF_VOID_P != 8
388                 if (alloc_num != MS_BLOCK_ALLOC_NUM)
389                         stat_major_blocks_alloced_less_ideal += alloc_num;
390 #endif
391         }
392
393         do {
394                 empty = empty_blocks;
395                 if (!empty)
396                         goto retry;
397                 block = empty;
398                 next = *(void**)block;
399         } while (SGEN_CAS_PTR (&empty_blocks, next, empty) != empty);
400
401         SGEN_ATOMIC_ADD_P (num_empty_blocks, -1);
402
403         *(void**)block = NULL;
404
405         g_assert (!((mword)block & (ms_block_size - 1)));
406
407         return block;
408 }
409
410 /*
411  * This doesn't actually free a block immediately, but enqueues it into the `empty_blocks`
412  * list, where it will either be freed later on, or reused in nursery collections.
413  */
414 static void
415 ms_free_block (MSBlockInfo *info)
416 {
417         void *empty;
418         char *block = MS_BLOCK_FOR_BLOCK_INFO (info);
419
420         sgen_memgov_release_space (ms_block_size, SPACE_MAJOR);
421         if (info->cardtable_mod_union)
422                 sgen_card_table_free_mod_union (info->cardtable_mod_union, block, ms_block_size);
423         memset (block, 0, ms_block_size);
424
425         do {
426                 empty = empty_blocks;
427                 *(void**)block = empty;
428         } while (SGEN_CAS_PTR (&empty_blocks, block, empty) != empty);
429
430         SGEN_ATOMIC_ADD_P (num_empty_blocks, 1);
431
432         binary_protocol_block_free (block, ms_block_size);
433 }
434
435 static gboolean
436 sweep_in_progress (void)
437 {
438         int state = sweep_state;
439         return state == SWEEP_STATE_SWEEPING ||
440                 state == SWEEP_STATE_SWEEPING_AND_ITERATING ||
441                 state == SWEEP_STATE_COMPACTING;
442 }
443
444 static inline gboolean
445 block_is_swept_or_marking (MSBlockInfo *block)
446 {
447         gint32 state = block->state;
448         return state == BLOCK_STATE_SWEPT || state == BLOCK_STATE_MARKING;
449 }
450
451 //#define MARKSWEEP_CONSISTENCY_CHECK
452
453 #ifdef MARKSWEEP_CONSISTENCY_CHECK
454 static void
455 check_block_free_list (MSBlockInfo *block, int size, gboolean pinned)
456 {
457         SGEN_ASSERT (0, !sweep_in_progress (), "Can't examine allocated blocks during sweep");
458         for (; block; block = block->next_free) {
459                 SGEN_ASSERT (0, block->state != BLOCK_STATE_CHECKING, "Can't have a block we're checking in a free list.");
460                 g_assert (block->obj_size == size);
461                 g_assert ((pinned && block->pinned) || (!pinned && !block->pinned));
462
463                 /* blocks in the free lists must have at least
464                    one free slot */
465                 g_assert (block->free_list);
466
467                 /* the block must be in the allocated_blocks array */
468                 g_assert (sgen_array_list_find (&allocated_blocks, BLOCK_TAG (block)) != (guint32)-1);
469         }
470 }
471
472 static void
473 check_empty_blocks (void)
474 {
475         void *p;
476         size_t i = 0;
477         for (p = empty_blocks; p; p = *(void**)p)
478                 ++i;
479         g_assert (i == num_empty_blocks);
480 }
481
482 static void
483 consistency_check (void)
484 {
485         MSBlockInfo *block;
486         int i;
487
488         /* check all blocks */
489         FOREACH_BLOCK_NO_LOCK (block) {
490                 int count = MS_BLOCK_FREE / block->obj_size;
491                 int num_free = 0;
492                 void **free;
493
494                 /* count number of free slots */
495                 for (i = 0; i < count; ++i) {
496                         void **obj = (void**) MS_BLOCK_OBJ (block, i);
497                         if (!MS_OBJ_ALLOCED (obj, block))
498                                 ++num_free;
499                 }
500
501                 /* check free list */
502                 for (free = block->free_list; free; free = (void**)*free) {
503                         g_assert (MS_BLOCK_FOR_OBJ (free) == block);
504                         --num_free;
505                 }
506                 g_assert (num_free == 0);
507
508                 /* check all mark words are zero */
509                 if (!sgen_concurrent_collection_in_progress () && block_is_swept_or_marking (block)) {
510                         for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
511                                 g_assert (block->mark_words [i] == 0);
512                 }
513         } END_FOREACH_BLOCK_NO_LOCK;
514
515         /* check free blocks */
516         for (i = 0; i < num_block_obj_sizes; ++i) {
517                 int j;
518                 for (j = 0; j < MS_BLOCK_TYPE_MAX; ++j)
519                         check_block_free_list (free_block_lists [j][i], block_obj_sizes [i], j & MS_BLOCK_FLAG_PINNED);
520         }
521
522         check_empty_blocks ();
523 }
524 #endif
525
526 static void
527 add_free_block (MSBlockInfo * volatile *free_blocks, int size_index, MSBlockInfo *block)
528 {
529         MSBlockInfo *old;
530         do {
531                 block->next_free = old = free_blocks [size_index];
532         } while (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], block, old) != old);
533 }
534
535 static void major_finish_sweep_checking (void);
536
537 static gboolean
538 ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
539 {
540         int size = block_obj_sizes [size_index];
541         int count = MS_BLOCK_FREE / size;
542         MSBlockInfo *info;
543         MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
544         char *obj_start;
545         int i;
546
547         if (!sgen_memgov_try_alloc_space (ms_block_size, SPACE_MAJOR))
548                 return FALSE;
549
550         info = (MSBlockInfo*)ms_get_empty_block ();
551
552         SGEN_ASSERT (9, count >= 2, "block with %d objects, it must hold at least 2", count);
553
554         info->obj_size = size;
555         info->obj_size_index = size_index;
556         info->pinned = pinned;
557         info->has_references = has_references;
558         info->has_pinned = pinned;
559         /*
560          * Blocks that are to-space are not evacuated from.  During an major collection
561          * blocks are allocated for two reasons: evacuating objects from the nursery and
562          * evacuating them from major blocks marked for evacuation.  In both cases we don't
563          * want further evacuation. We also don't want to evacuate objects allocated during
564          * the concurrent mark since it would add pointless stress on the finishing pause.
565          */
566         info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD) || sgen_concurrent_collection_in_progress ();
567         info->state = info->is_to_space ? BLOCK_STATE_MARKING : BLOCK_STATE_SWEPT;
568         SGEN_ASSERT (6, !sweep_in_progress () || info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
569         info->cardtable_mod_union = NULL;
570
571         update_heap_boundaries_for_block (info);
572
573         binary_protocol_block_alloc (info, ms_block_size);
574
575         /* build free list */
576         obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP;
577         info->free_list = (void**)obj_start;
578         /* we're skipping the last one - it must be nulled */
579         for (i = 0; i < count - 1; ++i) {
580                 char *next_obj_start = obj_start + size;
581                 *(void**)obj_start = next_obj_start;
582                 obj_start = next_obj_start;
583         }
584         /* the last one */
585         *(void**)obj_start = NULL;
586
587         add_free_block (free_blocks, size_index, info);
588
589         sgen_array_list_add (&allocated_blocks, BLOCK_TAG (info), 0, FALSE);
590
591         SGEN_ATOMIC_ADD_P (num_major_sections, 1);
592         return TRUE;
593 }
594
595 static gboolean
596 ptr_is_in_major_block (char *ptr, char **start, gboolean *pinned)
597 {
598         MSBlockInfo *block;
599
600         FOREACH_BLOCK_NO_LOCK (block) {
601                 if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) {
602                         int count = MS_BLOCK_FREE / block->obj_size;
603                         int i;
604
605                         if (start)
606                                 *start = NULL;
607                         for (i = 0; i <= count; ++i) {
608                                 if (ptr >= (char*)MS_BLOCK_OBJ (block, i) && ptr < (char*)MS_BLOCK_OBJ (block, i + 1)) {
609                                         if (start)
610                                                 *start = (char *)MS_BLOCK_OBJ (block, i);
611                                         break;
612                                 }
613                         }
614                         if (pinned)
615                                 *pinned = block->pinned;
616                         return TRUE;
617                 }
618         } END_FOREACH_BLOCK_NO_LOCK;
619         return FALSE;
620 }
621
622 static gboolean
623 ptr_is_from_pinned_alloc (char *ptr)
624 {
625         gboolean pinned;
626         if (ptr_is_in_major_block (ptr, NULL, &pinned))
627                 return pinned;
628         return FALSE;
629 }
630
631 static void
632 ensure_can_access_block_free_list (MSBlockInfo *block)
633 {
634  retry:
635         for (;;) {
636                 switch (block->state) {
637                 case BLOCK_STATE_SWEPT:
638                 case BLOCK_STATE_MARKING:
639                         return;
640                 case BLOCK_STATE_CHECKING:
641                         SGEN_ASSERT (0, FALSE, "How did we get a block that's being checked from a free list?");
642                         break;
643                 case BLOCK_STATE_NEED_SWEEPING:
644                         if (sweep_block (block))
645                                 ++stat_major_blocks_lazy_swept;
646                         break;
647                 case BLOCK_STATE_SWEEPING:
648                         /* FIXME: do this more elegantly */
649                         g_usleep (100);
650                         goto retry;
651                 default:
652                         SGEN_ASSERT (0, FALSE, "Illegal block state");
653                         break;
654                 }
655         }
656 }
657
658 static void*
659 unlink_slot_from_free_list_uncontested (MSBlockInfo * volatile *free_blocks, int size_index)
660 {
661         MSBlockInfo *block, *next_free_block;
662         void *obj, *next_free_slot;
663
664  retry:
665         block = free_blocks [size_index];
666         SGEN_ASSERT (9, block, "no free block to unlink from free_blocks %p size_index %d", free_blocks, size_index);
667
668         ensure_can_access_block_free_list (block);
669
670         obj = block->free_list;
671         SGEN_ASSERT (6, obj, "block %p in free list had no available object to alloc from", block);
672
673         next_free_slot = *(void**)obj;
674         if (next_free_slot) {
675                 block->free_list = (gpointer *)next_free_slot;
676                 return obj;
677         }
678
679         next_free_block = block->next_free;
680         if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free_block, block) != block)
681                 goto retry;
682
683         block->free_list = NULL;
684         block->next_free = NULL;
685
686         return obj;
687 }
688
689 static GCObject*
690 alloc_obj (GCVTable vtable, size_t size, gboolean pinned, gboolean has_references)
691 {
692         int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
693         MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
694         void *obj;
695
696         if (!free_blocks [size_index]) {
697                 if (G_UNLIKELY (!ms_alloc_block (size_index, pinned, has_references)))
698                         return NULL;
699         }
700
701         obj = unlink_slot_from_free_list_uncontested (free_blocks, size_index);
702
703         /* FIXME: assumes object layout */
704         *(GCVTable*)obj = vtable;
705
706         total_allocated_major += block_obj_sizes [size_index]; 
707
708         return (GCObject *)obj;
709 }
710
711 static GCObject*
712 major_alloc_object (GCVTable vtable, size_t size, gboolean has_references)
713 {
714         return alloc_obj (vtable, size, FALSE, has_references);
715 }
716
717 /*
718  * This can only be called by sgen workers. While this is called we assume
719  * that no other thread is accessing the block free lists. The world should
720  * be stopped and the gc thread should be waiting for workers to finish.
721  */
722 static GCObject*
723 major_alloc_object_par (GCVTable vtable, size_t size, gboolean has_references)
724 {
725         int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
726         MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (FALSE, has_references);
727         MSBlockInfo **free_blocks_local = FREE_BLOCKS_LOCAL (FALSE, has_references);
728         void *obj;
729
730         if (free_blocks_local [size_index]) {
731 get_slot:
732                 obj = unlink_slot_from_free_list_uncontested (free_blocks_local, size_index);
733         } else {
734                 MSBlockInfo *block;
735 get_block:
736                 block = free_blocks [size_index];
737                 if (!block) {
738                         if (G_UNLIKELY (!ms_alloc_block (size_index, FALSE, has_references)))
739                                 return NULL;
740                         goto get_block;
741                 } else {
742                         MSBlockInfo *next_free = block->next_free;
743                         /*
744                          * Once a block is removed from the main list, it cannot return on the list until
745                          * all the workers are finished and sweep is starting. This means we don't need
746                          * to account for ABA problems.
747                          */
748                         if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free, block) != block)
749                                 goto get_block;
750                         block->next_free = free_blocks_local [size_index];
751                         free_blocks_local [size_index] = block;
752
753                         goto get_slot;
754                 }
755         }
756
757         /* FIXME: assumes object layout */
758         *(GCVTable*)obj = vtable;
759
760         /* FIXME is it worth CAS-ing here */
761         total_allocated_major += block_obj_sizes [size_index]; 
762
763         return (GCObject *)obj;
764 }
765
766 /*
767  * We're not freeing the block if it's empty.  We leave that work for
768  * the next major collection.
769  *
770  * This is just called from the domain clearing code, which runs in a
771  * single thread and has the GC lock, so we don't need an extra lock.
772  */
773 static void
774 free_object (GCObject *obj, size_t size, gboolean pinned)
775 {
776         MSBlockInfo *block = MS_BLOCK_FOR_OBJ (obj);
777         int word, bit;
778         gboolean in_free_list;
779
780         SGEN_ASSERT (9, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
781
782         ensure_can_access_block_free_list (block);
783         SGEN_ASSERT (9, (pinned && block->pinned) || (!pinned && !block->pinned), "free-object pinning mixup object %p pinned %d block %p pinned %d", obj, pinned, block, block->pinned);
784         SGEN_ASSERT (9, MS_OBJ_ALLOCED (obj, block), "object %p is already free", obj);
785         MS_CALC_MARK_BIT (word, bit, obj);
786         SGEN_ASSERT (9, !MS_MARK_BIT (block, word, bit), "object %p has mark bit set", obj);
787
788         memset (obj, 0, size);
789
790         in_free_list = !!block->free_list;
791         *(void**)obj = block->free_list;
792         block->free_list = (void**)obj;
793
794         if (!in_free_list) {
795                 MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (pinned, block->has_references);
796                 int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
797                 SGEN_ASSERT (9, !block->next_free, "block %p doesn't have a free-list of object but belongs to a free-list of blocks", block);
798                 add_free_block (free_blocks, size_index, block);
799         }
800 }
801
802 static void
803 major_free_non_pinned_object (GCObject *obj, size_t size)
804 {
805         free_object (obj, size, FALSE);
806 }
807
808 /* size is a multiple of SGEN_ALLOC_ALIGN */
809 static GCObject*
810 major_alloc_small_pinned_obj (GCVTable vtable, size_t size, gboolean has_references)
811 {
812         void *res;
813
814         res = alloc_obj (vtable, size, TRUE, has_references);
815          /*If we failed to alloc memory, we better try releasing memory
816           *as pinned alloc is requested by the runtime.
817           */
818          if (!res) {
819                 sgen_perform_collection (0, GENERATION_OLD, "pinned alloc failure", TRUE, TRUE);
820                 res = alloc_obj (vtable, size, TRUE, has_references);
821          }
822          return (GCObject *)res;
823 }
824
825 static void
826 free_pinned_object (GCObject *obj, size_t size)
827 {
828         free_object (obj, size, TRUE);
829 }
830
831 /*
832  * size is already rounded up and we hold the GC lock.
833  */
834 static GCObject*
835 major_alloc_degraded (GCVTable vtable, size_t size)
836 {
837         GCObject *obj;
838
839         obj = alloc_obj (vtable, size, FALSE, SGEN_VTABLE_HAS_REFERENCES (vtable));
840         if (G_LIKELY (obj)) {
841                 HEAVY_STAT (++stat_objects_alloced_degraded);
842                 HEAVY_STAT (stat_bytes_alloced_degraded += size);
843         }
844         return obj;
845 }
846
847 /*
848  * obj is some object.  If it's not in the major heap (i.e. if it's in
849  * the nursery or LOS), return FALSE.  Otherwise return whether it's
850  * been marked or copied.
851  */
852 static gboolean
853 major_is_object_live (GCObject *obj)
854 {
855         MSBlockInfo *block;
856         int word, bit;
857         mword objsize;
858
859         if (sgen_ptr_in_nursery (obj))
860                 return FALSE;
861
862         objsize = SGEN_ALIGN_UP (sgen_safe_object_get_size (obj));
863
864         /* LOS */
865         if (objsize > SGEN_MAX_SMALL_OBJ_SIZE)
866                 return FALSE;
867
868         /* now we know it's in a major block */
869         block = MS_BLOCK_FOR_OBJ (obj);
870         SGEN_ASSERT (9, !block->pinned, "block %p is pinned, BTW why is this bad?", block);
871         MS_CALC_MARK_BIT (word, bit, obj);
872         return MS_MARK_BIT (block, word, bit) ? TRUE : FALSE;
873 }
874
875 static gboolean
876 major_ptr_is_in_non_pinned_space (char *ptr, char **start)
877 {
878         gboolean pinned;
879         if (ptr_is_in_major_block (ptr, start, &pinned))
880                 return !pinned;
881         return FALSE;
882 }
883
884 static gboolean
885 try_set_sweep_state (int new_, int expected)
886 {
887         int old = SGEN_CAS (&sweep_state, new_, expected);
888         return old == expected;
889 }
890
891 static void
892 set_sweep_state (int new_, int expected)
893 {
894         gboolean success = try_set_sweep_state (new_, expected);
895         SGEN_ASSERT (0, success, "Could not set sweep state.");
896 }
897
898 static gboolean ensure_block_is_checked_for_sweeping (guint32 block_index, gboolean wait, gboolean *have_checked);
899
900 static SgenThreadPoolJob * volatile sweep_job;
901 static SgenThreadPoolJob * volatile sweep_blocks_job;
902
903 static void
904 major_finish_sweep_checking (void)
905 {
906         guint32 block_index;
907         SgenThreadPoolJob *job;
908
909  retry:
910         switch (sweep_state) {
911         case SWEEP_STATE_SWEPT:
912         case SWEEP_STATE_NEED_SWEEPING:
913                 return;
914         case SWEEP_STATE_SWEEPING:
915                 if (try_set_sweep_state (SWEEP_STATE_SWEEPING_AND_ITERATING, SWEEP_STATE_SWEEPING))
916                         break;
917                 goto retry;
918         case SWEEP_STATE_SWEEPING_AND_ITERATING:
919                 SGEN_ASSERT (0, FALSE, "Is there another minor collection running?");
920                 goto retry;
921         case SWEEP_STATE_COMPACTING:
922                 goto wait;
923         default:
924                 SGEN_ASSERT (0, FALSE, "Invalid sweep state.");
925                 break;
926         }
927
928         /*
929          * We're running with the world stopped and the only other thread doing work is the
930          * sweep thread, which doesn't add blocks to the array, so we can safely access
931          * `next_slot`.
932          */
933         for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index)
934                 ensure_block_is_checked_for_sweeping (block_index, FALSE, NULL);
935
936         set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_SWEEPING_AND_ITERATING);
937
938  wait:
939         job = sweep_job;
940         if (job)
941                 sgen_thread_pool_job_wait (sweep_pool_context, job);
942         SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
943         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
944 }
945
946 static void
947 major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc callback, void *data)
948 {
949         gboolean sweep = flags & ITERATE_OBJECTS_SWEEP;
950         gboolean non_pinned = flags & ITERATE_OBJECTS_NON_PINNED;
951         gboolean pinned = flags & ITERATE_OBJECTS_PINNED;
952         MSBlockInfo *block;
953
954         /* No actual sweeping will take place if we are in the middle of a major collection. */
955         major_finish_sweep_checking ();
956         FOREACH_BLOCK_NO_LOCK (block) {
957                 int count = MS_BLOCK_FREE / block->obj_size;
958                 int i;
959
960                 if (block->pinned && !pinned)
961                         continue;
962                 if (!block->pinned && !non_pinned)
963                         continue;
964                 if (sweep && lazy_sweep && !block_is_swept_or_marking (block)) {
965                         sweep_block (block);
966                         SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
967                 }
968
969                 for (i = 0; i < count; ++i) {
970                         void **obj = (void**) MS_BLOCK_OBJ (block, i);
971                         if (MS_OBJ_ALLOCED (obj, block))
972                                 callback ((GCObject*)obj, block->obj_size, data);
973                 }
974         } END_FOREACH_BLOCK_NO_LOCK;
975 }
976
977 static gboolean
978 major_is_valid_object (char *object)
979 {
980         MSBlockInfo *block;
981
982         FOREACH_BLOCK_NO_LOCK (block) {
983                 int idx;
984                 char *obj;
985
986                 if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= object))
987                         continue;
988
989                 idx = MS_BLOCK_OBJ_INDEX (object, block);
990                 obj = (char*)MS_BLOCK_OBJ (block, idx);
991                 if (obj != object)
992                         return FALSE;
993                 return MS_OBJ_ALLOCED (obj, block);
994         } END_FOREACH_BLOCK_NO_LOCK;
995
996         return FALSE;
997 }
998
999
1000 static GCVTable
1001 major_describe_pointer (char *ptr)
1002 {
1003         MSBlockInfo *block;
1004
1005         FOREACH_BLOCK_NO_LOCK (block) {
1006                 int idx;
1007                 char *obj;
1008                 gboolean live;
1009                 GCVTable vtable;
1010                 int w, b;
1011                 gboolean marked;
1012
1013                 if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= ptr))
1014                         continue;
1015
1016                 SGEN_LOG (0, "major-ptr (block %p sz %d pin %d ref %d)\n",
1017                         MS_BLOCK_FOR_BLOCK_INFO (block), block->obj_size, block->pinned, block->has_references);
1018
1019                 idx = MS_BLOCK_OBJ_INDEX (ptr, block);
1020                 obj = (char*)MS_BLOCK_OBJ (block, idx);
1021                 live = MS_OBJ_ALLOCED (obj, block);
1022                 vtable = live ? SGEN_LOAD_VTABLE ((GCObject*)obj) : NULL;
1023
1024                 MS_CALC_MARK_BIT (w, b, obj);
1025                 marked = MS_MARK_BIT (block, w, b);
1026
1027                 if (obj == ptr) {
1028                         SGEN_LOG (0, "\t(");
1029                         if (live)
1030                                 SGEN_LOG (0, "object");
1031                         else
1032                                 SGEN_LOG (0, "dead-object");
1033                 } else {
1034                         if (live)
1035                                 SGEN_LOG (0, "interior-ptr offset %zd", ptr - obj);
1036                         else
1037                                 SGEN_LOG (0, "dead-interior-ptr offset %zd", ptr - obj);
1038                 }
1039
1040                 SGEN_LOG (0, " marked %d)\n", marked ? 1 : 0);
1041
1042                 return vtable;
1043         } END_FOREACH_BLOCK_NO_LOCK;
1044
1045         return NULL;
1046 }
1047
1048 static void
1049 major_check_scan_starts (void)
1050 {
1051 }
1052
1053 static void
1054 major_dump_heap (FILE *heap_dump_file)
1055 {
1056         MSBlockInfo *block;
1057         int *slots_available = (int *)alloca (sizeof (int) * num_block_obj_sizes);
1058         int *slots_used = (int *)alloca (sizeof (int) * num_block_obj_sizes);
1059         int i;
1060
1061         for (i = 0; i < num_block_obj_sizes; ++i)
1062                 slots_available [i] = slots_used [i] = 0;
1063
1064         FOREACH_BLOCK_NO_LOCK (block) {
1065                 int index = ms_find_block_obj_size_index (block->obj_size);
1066                 int count = MS_BLOCK_FREE / block->obj_size;
1067
1068                 slots_available [index] += count;
1069                 for (i = 0; i < count; ++i) {
1070                         if (MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block))
1071                                 ++slots_used [index];
1072                 }
1073         } END_FOREACH_BLOCK_NO_LOCK;
1074
1075         fprintf (heap_dump_file, "<occupancies>\n");
1076         for (i = 0; i < num_block_obj_sizes; ++i) {
1077                 fprintf (heap_dump_file, "<occupancy size=\"%d\" available=\"%d\" used=\"%d\" />\n",
1078                                 block_obj_sizes [i], slots_available [i], slots_used [i]);
1079         }
1080         fprintf (heap_dump_file, "</occupancies>\n");
1081
1082         FOREACH_BLOCK_NO_LOCK (block) {
1083                 int count = MS_BLOCK_FREE / block->obj_size;
1084                 int i;
1085                 int start = -1;
1086
1087                 fprintf (heap_dump_file, "<section type=\"%s\" size=\"%zu\">\n", "old", (size_t)MS_BLOCK_FREE);
1088
1089                 for (i = 0; i <= count; ++i) {
1090                         if ((i < count) && MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block)) {
1091                                 if (start < 0)
1092                                         start = i;
1093                         } else {
1094                                 if (start >= 0) {
1095                                         sgen_dump_occupied ((char *)MS_BLOCK_OBJ (block, start), (char *)MS_BLOCK_OBJ (block, i), MS_BLOCK_FOR_BLOCK_INFO (block));
1096                                         start = -1;
1097                                 }
1098                         }
1099                 }
1100
1101                 fprintf (heap_dump_file, "</section>\n");
1102         } END_FOREACH_BLOCK_NO_LOCK;
1103 }
1104
1105 static guint8*
1106 get_cardtable_mod_union_for_block (MSBlockInfo *block, gboolean allocate)
1107 {
1108         guint8 *mod_union = block->cardtable_mod_union;
1109         guint8 *other;
1110         if (mod_union)
1111                 return mod_union;
1112         else if (!allocate)
1113                 return NULL;
1114         mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
1115         other = (guint8 *)SGEN_CAS_PTR ((gpointer*)&block->cardtable_mod_union, mod_union, NULL);
1116         if (!other) {
1117                 SGEN_ASSERT (0, block->cardtable_mod_union == mod_union, "Why did CAS not replace?");
1118                 return mod_union;
1119         }
1120         sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
1121         return other;
1122 }
1123
1124 static inline guint8*
1125 major_get_cardtable_mod_union_for_reference (char *ptr)
1126 {
1127         MSBlockInfo *block = MS_BLOCK_FOR_OBJ (ptr);
1128         size_t offset = sgen_card_table_get_card_offset (ptr, (char*)sgen_card_table_align_pointer (MS_BLOCK_FOR_BLOCK_INFO (block)));
1129         guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE);
1130         SGEN_ASSERT (0, mod_union, "FIXME: optionally allocate the mod union if it's not here and CAS it in.");
1131         return &mod_union [offset];
1132 }
1133
1134 /*
1135  * Mark the mod-union card for `ptr`, which must be a reference within the object `obj`.
1136  */
1137 static void
1138 mark_mod_union_card (GCObject *obj, void **ptr, GCObject *value_obj)
1139 {
1140         int type = sgen_obj_get_descriptor (obj) & DESC_TYPE_MASK;
1141         if (sgen_safe_object_is_small (obj, type)) {
1142                 guint8 *card_byte = major_get_cardtable_mod_union_for_reference ((char*)ptr);
1143                 SGEN_ASSERT (0, MS_BLOCK_FOR_OBJ (obj) == MS_BLOCK_FOR_OBJ (ptr), "How can an object and a reference inside it not be in the same block?");
1144                 *card_byte = 1;
1145         } else {
1146                 sgen_los_mark_mod_union_card (obj, ptr);
1147         }
1148         binary_protocol_mod_union_remset (obj, ptr, value_obj, SGEN_LOAD_VTABLE (value_obj));
1149 }
1150
1151 static inline gboolean
1152 major_block_is_evacuating (MSBlockInfo *block)
1153 {
1154         if (evacuate_block_obj_sizes [block->obj_size_index] &&
1155                         !block->has_pinned &&
1156                         !block->is_to_space)
1157                 return TRUE;
1158         return FALSE;
1159 }
1160
1161 #define MS_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {           \
1162                 int __word, __bit;                                      \
1163                 MS_CALC_MARK_BIT (__word, __bit, (obj));                \
1164                 SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
1165                 if (!MS_MARK_BIT ((block), __word, __bit)) {            \
1166                         MS_SET_MARK_BIT ((block), __word, __bit);       \
1167                         if (sgen_gc_descr_has_references (desc))                        \
1168                                 GRAY_OBJECT_ENQUEUE_SERIAL ((queue), (obj), (desc)); \
1169                         binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
1170                         INC_NUM_MAJOR_OBJECTS_MARKED ();                \
1171                 }                                                       \
1172         } while (0)
1173 #define MS_MARK_OBJECT_AND_ENQUEUE_PAR(obj,desc,block,queue) do {       \
1174                 int __word, __bit;                                      \
1175                 gboolean first;                                         \
1176                 MS_CALC_MARK_BIT (__word, __bit, (obj));                \
1177                 SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
1178                 MS_SET_MARK_BIT_PAR ((block), __word, __bit, first);    \
1179                 if (first) {                                            \
1180                         if (sgen_gc_descr_has_references (desc))        \
1181                                 GRAY_OBJECT_ENQUEUE_PARALLEL ((queue), (obj), (desc)); \
1182                         binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
1183                         INC_NUM_MAJOR_OBJECTS_MARKED ();                \
1184                 }                                                       \
1185         } while (0)
1186
1187
1188
1189 static void
1190 pin_major_object (GCObject *obj, SgenGrayQueue *queue)
1191 {
1192         MSBlockInfo *block;
1193
1194         if (concurrent_mark)
1195                 g_assert_not_reached ();
1196
1197         block = MS_BLOCK_FOR_OBJ (obj);
1198         block->has_pinned = TRUE;
1199         MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
1200 }
1201
1202 #define COPY_OR_MARK_PARALLEL
1203 #include "sgen-major-copy-object.h"
1204
1205 static long long
1206 major_get_and_reset_num_major_objects_marked (void)
1207 {
1208 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
1209         long long num = num_major_objects_marked;
1210         num_major_objects_marked = 0;
1211         return num;
1212 #else
1213         return 0;
1214 #endif
1215 }
1216
1217 #define PREFETCH_CARDS          1       /* BOOL FASTENABLE */
1218 #if !PREFETCH_CARDS
1219 #undef PREFETCH_CARDS
1220 #endif
1221
1222 /* gcc 4.2.1 from xcode4 crashes on sgen_card_table_get_card_address () when this is enabled */
1223 #if defined(PLATFORM_MACOSX)
1224 #if MONO_GNUC_VERSION <= 40300
1225 #undef PREFETCH_CARDS
1226 #endif
1227 #endif
1228
1229 #ifdef HEAVY_STATISTICS
1230 static guint64 stat_optimized_copy;
1231 static guint64 stat_optimized_copy_nursery;
1232 static guint64 stat_optimized_copy_nursery_forwarded;
1233 static guint64 stat_optimized_copy_nursery_pinned;
1234 static guint64 stat_optimized_copy_major;
1235 static guint64 stat_optimized_copy_major_small_fast;
1236 static guint64 stat_optimized_copy_major_small_slow;
1237 static guint64 stat_optimized_copy_major_large;
1238 static guint64 stat_optimized_copy_major_forwarded;
1239 static guint64 stat_optimized_copy_major_small_evacuate;
1240 static guint64 stat_optimized_major_scan;
1241 static guint64 stat_optimized_major_scan_no_refs;
1242
1243 static guint64 stat_drain_prefetch_fills;
1244 static guint64 stat_drain_prefetch_fill_failures;
1245 static guint64 stat_drain_loops;
1246 #endif
1247
1248 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_no_evacuation
1249 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_no_evacuation
1250 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_no_evacuation
1251 #include "sgen-marksweep-drain-gray-stack.h"
1252
1253 #define COPY_OR_MARK_PARALLEL
1254 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_par_no_evacuation
1255 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_par_no_evacuation
1256 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_par_no_evacuation
1257 #include "sgen-marksweep-drain-gray-stack.h"
1258
1259 #define COPY_OR_MARK_WITH_EVACUATION
1260 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_with_evacuation
1261 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_with_evacuation
1262 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_with_evacuation
1263 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_with_evacuation
1264 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_with_evacuation
1265 #include "sgen-marksweep-drain-gray-stack.h"
1266
1267 #define COPY_OR_MARK_PARALLEL
1268 #define COPY_OR_MARK_WITH_EVACUATION
1269 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_par_with_evacuation
1270 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_par_with_evacuation
1271 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_par_with_evacuation
1272 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_par_with_evacuation
1273 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_par_with_evacuation
1274 #include "sgen-marksweep-drain-gray-stack.h"
1275
1276 #define COPY_OR_MARK_CONCURRENT
1277 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_no_evacuation
1278 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_no_evacuation
1279 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_no_evacuation
1280 #include "sgen-marksweep-drain-gray-stack.h"
1281
1282 #define COPY_OR_MARK_PARALLEL
1283 #define COPY_OR_MARK_CONCURRENT
1284 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_par_no_evacuation
1285 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_par_no_evacuation
1286 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_par_no_evacuation
1287 #include "sgen-marksweep-drain-gray-stack.h"
1288
1289 #define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
1290 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_with_evacuation
1291 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_with_evacuation
1292 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_concurrent_with_evacuation
1293 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_concurrent_with_evacuation
1294 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_with_evacuation
1295 #include "sgen-marksweep-drain-gray-stack.h"
1296
1297 #define COPY_OR_MARK_PARALLEL
1298 #define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
1299 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_par_with_evacuation
1300 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_par_with_evacuation
1301 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_concurrent_par_with_evacuation
1302 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_concurrent_par_with_evacuation
1303 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_par_with_evacuation
1304 #include "sgen-marksweep-drain-gray-stack.h"
1305
1306 static inline gboolean
1307 major_is_evacuating (void)
1308 {
1309         int i;
1310         for (i = 0; i < num_block_obj_sizes; ++i) {
1311                 if (evacuate_block_obj_sizes [i]) {
1312                         return TRUE;
1313                 }
1314         }
1315
1316         return FALSE;
1317 }
1318
1319 static gboolean
1320 drain_gray_stack (SgenGrayQueue *queue)
1321 {
1322         if (major_is_evacuating ())
1323                 return drain_gray_stack_with_evacuation (queue);
1324         else
1325                 return drain_gray_stack_no_evacuation (queue);
1326 }
1327
1328 static gboolean
1329 drain_gray_stack_par (SgenGrayQueue *queue)
1330 {
1331         if (major_is_evacuating ())
1332                 return drain_gray_stack_par_with_evacuation (queue);
1333         else
1334                 return drain_gray_stack_par_no_evacuation (queue);
1335 }
1336
1337 static gboolean
1338 drain_gray_stack_concurrent (SgenGrayQueue *queue)
1339 {
1340         if (major_is_evacuating ())
1341                 return drain_gray_stack_concurrent_with_evacuation (queue);
1342         else
1343                 return drain_gray_stack_concurrent_no_evacuation (queue);
1344 }
1345
1346 static gboolean
1347 drain_gray_stack_concurrent_par (SgenGrayQueue *queue)
1348 {
1349         if (major_is_evacuating ())
1350                 return drain_gray_stack_concurrent_par_with_evacuation (queue);
1351         else
1352                 return drain_gray_stack_concurrent_par_no_evacuation (queue);
1353 }
1354
1355 static void
1356 major_copy_or_mark_object_canonical (GCObject **ptr, SgenGrayQueue *queue)
1357 {
1358         major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
1359 }
1360
1361 static void
1362 major_copy_or_mark_object_concurrent_canonical (GCObject **ptr, SgenGrayQueue *queue)
1363 {
1364         major_copy_or_mark_object_concurrent_with_evacuation (ptr, *ptr, queue);
1365 }
1366
1367 static void
1368 major_copy_or_mark_object_concurrent_par_canonical (GCObject **ptr, SgenGrayQueue *queue)
1369 {
1370         major_copy_or_mark_object_concurrent_par_with_evacuation (ptr, *ptr, queue);
1371 }
1372
1373 static void
1374 major_copy_or_mark_object_concurrent_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
1375 {
1376         major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
1377 }
1378
1379 static void
1380 major_copy_or_mark_object_concurrent_par_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
1381 {
1382         major_copy_or_mark_object_par_with_evacuation (ptr, *ptr, queue);
1383 }
1384
1385 static void
1386 mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue)
1387 {
1388         void **entry, **end;
1389         int last_index = -1;
1390
1391         if (first_entry == last_entry)
1392                 return;
1393
1394         entry = sgen_pinning_get_entry (first_entry);
1395         end = sgen_pinning_get_entry (last_entry);
1396
1397         for (; entry < end; ++entry) {
1398                 int index = MS_BLOCK_OBJ_INDEX (*entry, block);
1399                 GCObject *obj;
1400                 SGEN_ASSERT (9, index >= 0 && index < MS_BLOCK_FREE / block->obj_size, "invalid object %p index %d max-index %d", *entry, index, (int)(MS_BLOCK_FREE / block->obj_size));
1401                 if (index == last_index)
1402                         continue;
1403                 obj = MS_BLOCK_OBJ (block, index);
1404                 if (!MS_OBJ_ALLOCED (obj, block))
1405                         continue;
1406                 MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
1407                 sgen_pin_stats_register_object (obj, GENERATION_OLD);
1408                 last_index = index;
1409         }
1410
1411         /*
1412          * There might have been potential pinning "pointers" into this block, but none of
1413          * them pointed to occupied slots, in which case we don't have to pin the block.
1414          */
1415         if (last_index >= 0)
1416                 block->has_pinned = TRUE;
1417 }
1418
1419 static inline void
1420 sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
1421 {
1422         int obj_index;
1423         void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, 0, obj_size);
1424
1425         for (obj_index = 0; obj_index < count; ++obj_index, obj = (void*)((mword)obj + obj_size)) {
1426                 int word, bit;
1427
1428                 MS_CALC_MARK_BIT (word, bit, obj);
1429                 if (MS_MARK_BIT (block, word, bit)) {
1430                         SGEN_ASSERT (9, MS_OBJ_ALLOCED (obj, block), "object %p not allocated", obj);
1431                 } else {
1432                         /* an unmarked object */
1433                         if (MS_OBJ_ALLOCED (obj, block)) {
1434                                 /*
1435                                  * FIXME: Merge consecutive
1436                                  * slots for lower reporting
1437                                  * overhead.  Maybe memset
1438                                  * will also benefit?
1439                                  */
1440                                 binary_protocol_empty (obj, obj_size);
1441                                 memset (obj, 0, obj_size);
1442                         }
1443                         *(void**)obj = block->free_list;
1444                         block->free_list = (void **)obj;
1445                 }
1446         }
1447 }
1448
1449 static inline gboolean
1450 try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
1451 {
1452         gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
1453         gboolean success = old_state == expected_state;
1454         if (success)
1455                 binary_protocol_block_set_state (block, ms_block_size, old_state, new_state);
1456         return success;
1457 }
1458
1459 static inline void
1460 set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
1461 {
1462         SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
1463         block->state = new_state;
1464         binary_protocol_block_set_state (block, ms_block_size, expected_state, new_state);
1465 }
1466
1467 /*
1468  * If `block` needs sweeping, sweep it and return TRUE.  Otherwise return FALSE.
1469  *
1470  * Sweeping means iterating through the block's slots and building the free-list from the
1471  * unmarked ones.  They will also be zeroed.  The mark bits will be reset.
1472  */
1473 static gboolean
1474 sweep_block (MSBlockInfo *block)
1475 {
1476         int count;
1477         void *reversed = NULL;
1478
1479  retry:
1480         switch (block->state) {
1481         case BLOCK_STATE_SWEPT:
1482                 return FALSE;
1483         case BLOCK_STATE_MARKING:
1484         case BLOCK_STATE_CHECKING:
1485                 SGEN_ASSERT (0, FALSE, "How did we get to sweep a block that's being marked or being checked?");
1486                 goto retry;
1487         case BLOCK_STATE_SWEEPING:
1488                 /* FIXME: Do this more elegantly */
1489                 g_usleep (100);
1490                 goto retry;
1491         case BLOCK_STATE_NEED_SWEEPING:
1492                 if (!try_set_block_state (block, BLOCK_STATE_SWEEPING, BLOCK_STATE_NEED_SWEEPING))
1493                         goto retry;
1494                 break;
1495         default:
1496                 SGEN_ASSERT (0, FALSE, "Illegal block state");
1497         }
1498
1499         SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
1500
1501         count = MS_BLOCK_FREE / block->obj_size;
1502
1503         block->free_list = NULL;
1504
1505         /* Use inline instances specialized to constant sizes, this allows the compiler to replace the memset calls with inline code */
1506         // FIXME: Add more sizes
1507         switch (block->obj_size) {
1508         case 16:
1509                 sweep_block_for_size (block, count, 16);
1510                 break;
1511         default:
1512                 sweep_block_for_size (block, count, block->obj_size);
1513                 break;
1514         }
1515
1516         /* reset mark bits */
1517         memset (block->mark_words, 0, sizeof (guint32) * MS_NUM_MARK_WORDS);
1518
1519         /* Reverse free list so that it's in address order */
1520         reversed = NULL;
1521         while (block->free_list) {
1522                 void *next = *(void**)block->free_list;
1523                 *(void**)block->free_list = reversed;
1524                 reversed = block->free_list;
1525                 block->free_list = (void **)next;
1526         }
1527         block->free_list = (void **)reversed;
1528
1529         mono_memory_write_barrier ();
1530
1531         set_block_state (block, BLOCK_STATE_SWEPT, BLOCK_STATE_SWEEPING);
1532
1533         return TRUE;
1534 }
1535
1536 static inline int
1537 bitcount (mword d)
1538 {
1539         int count = 0;
1540
1541 #ifdef __GNUC__
1542         if (sizeof (mword) == 8)
1543                 count += __builtin_popcountll (d);
1544         else
1545                 count += __builtin_popcount (d);
1546 #else
1547         while (d) {
1548                 count ++;
1549                 d &= (d - 1);
1550         }
1551 #endif
1552         return count;
1553 }
1554
1555 /* statistics for evacuation */
1556 static size_t *sweep_slots_available;
1557 static size_t *sweep_slots_used;
1558 static size_t *sweep_num_blocks;
1559
1560 static volatile size_t num_major_sections_before_sweep;
1561 static volatile size_t num_major_sections_freed_in_sweep;
1562
1563 static void
1564 sgen_worker_clear_free_block_lists (WorkerData *worker)
1565 {
1566         int i, j;
1567
1568         if (!worker->free_block_lists)
1569                 return;
1570
1571         for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) {
1572                 for (j = 0; j < num_block_obj_sizes; j++) {
1573                         ((MSBlockInfo***) worker->free_block_lists) [i][j] = NULL;
1574                 }
1575         }
1576 }
1577
1578 static void
1579 sgen_worker_clear_free_block_lists_evac (WorkerData *worker)
1580 {
1581         int i, j;
1582
1583         if (!worker->free_block_lists)
1584                 return;
1585
1586         for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) {
1587                 for (j = 0; j < num_block_obj_sizes; j++) {
1588                         if (((MSBlockInfo***) worker->free_block_lists) [i][j])
1589                                 SGEN_ASSERT (0, !((MSBlockInfo***) worker->free_block_lists) [i][j]->next_free, "Why do we have linked free blocks on the workers");
1590
1591                         if (evacuate_block_obj_sizes [j])
1592                                 ((MSBlockInfo***) worker->free_block_lists) [i][j] = NULL;
1593                 }
1594         }
1595 }
1596
1597 static void
1598 sweep_start (void)
1599 {
1600         int i;
1601
1602         for (i = 0; i < num_block_obj_sizes; ++i)
1603                 sweep_slots_available [i] = sweep_slots_used [i] = sweep_num_blocks [i] = 0;
1604
1605         /* clear all the free lists */
1606         for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i) {
1607                 MSBlockInfo * volatile *free_blocks = free_block_lists [i];
1608                 int j;
1609                 for (j = 0; j < num_block_obj_sizes; ++j)
1610                         free_blocks [j] = NULL;
1611         }
1612
1613         sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists);
1614         sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists);
1615
1616         compact_blocks = TRUE;
1617 }
1618
1619 static void sweep_finish (void);
1620
1621 /*
1622  * If `wait` is TRUE and the block is currently being checked, this function will wait until
1623  * the checking has finished.
1624  *
1625  * Returns whether the block is still there.  If `wait` is FALSE, the return value will not
1626  * be correct, i.e. must not be used.
1627  */
1628 static gboolean
1629 ensure_block_is_checked_for_sweeping (guint32 block_index, gboolean wait, gboolean *have_checked)
1630 {
1631         int count;
1632         gboolean have_live = FALSE;
1633         gboolean have_free = FALSE;
1634         int nused = 0;
1635         int block_state;
1636         int i;
1637         void *tagged_block;
1638         MSBlockInfo *block;
1639         volatile gpointer *block_slot = sgen_array_list_get_slot (&allocated_blocks, block_index);
1640
1641         SGEN_ASSERT (6, sweep_in_progress (), "Why do we call this function if there's no sweep in progress?");
1642
1643         if (have_checked)
1644                 *have_checked = FALSE;
1645
1646  retry:
1647         tagged_block = *(void * volatile *)block_slot;
1648         if (!tagged_block)
1649                 return FALSE;
1650
1651         if (BLOCK_IS_TAGGED_CHECKING (tagged_block)) {
1652                 if (!wait)
1653                         return FALSE;
1654                 /* FIXME: do this more elegantly */
1655                 g_usleep (100);
1656                 goto retry;
1657         }
1658
1659         if (SGEN_CAS_PTR (block_slot, BLOCK_TAG_CHECKING (tagged_block), tagged_block) != tagged_block)
1660                 goto retry;
1661
1662         block = BLOCK_UNTAG (tagged_block);
1663         block_state = block->state;
1664
1665         if (!sweep_in_progress ()) {
1666                 SGEN_ASSERT (6, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
1667                 if (!lazy_sweep)
1668                         SGEN_ASSERT (6, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
1669         }
1670
1671         switch (block_state) {
1672         case BLOCK_STATE_SWEPT:
1673         case BLOCK_STATE_NEED_SWEEPING:
1674         case BLOCK_STATE_SWEEPING:
1675                 goto done;
1676         case BLOCK_STATE_MARKING:
1677                 break;
1678         case BLOCK_STATE_CHECKING:
1679                 SGEN_ASSERT (0, FALSE, "We set the CHECKING bit - how can the stage be CHECKING?");
1680                 goto done;
1681         default:
1682                 SGEN_ASSERT (0, FALSE, "Illegal block state");
1683                 break;
1684         }
1685
1686         SGEN_ASSERT (6, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
1687         set_block_state (block, BLOCK_STATE_CHECKING, BLOCK_STATE_MARKING);
1688
1689         if (have_checked)
1690                 *have_checked = TRUE;
1691
1692         block->has_pinned = block->pinned;
1693
1694         block->is_to_space = FALSE;
1695
1696         count = MS_BLOCK_FREE / block->obj_size;
1697
1698         if (block->cardtable_mod_union)
1699                 memset (block->cardtable_mod_union, 0, CARDS_PER_BLOCK);
1700
1701         /* Count marked objects in the block */
1702         for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
1703                 nused += bitcount (block->mark_words [i]);
1704
1705         block->nused = nused;
1706         if (nused)
1707                 have_live = TRUE;
1708         if (nused < count)
1709                 have_free = TRUE;
1710
1711         if (have_live) {
1712                 int obj_size_index = block->obj_size_index;
1713                 gboolean has_pinned = block->has_pinned;
1714
1715                 set_block_state (block, BLOCK_STATE_NEED_SWEEPING, BLOCK_STATE_CHECKING);
1716
1717                 /*
1718                  * FIXME: Go straight to SWEPT if there are no free slots.  We need
1719                  * to set the free slot list to NULL, though, and maybe update some
1720                  * statistics.
1721                  */
1722                 if (!lazy_sweep)
1723                         sweep_block (block);
1724
1725                 if (!has_pinned) {
1726                         ++sweep_num_blocks [obj_size_index];
1727                         sweep_slots_used [obj_size_index] += nused;
1728                         sweep_slots_available [obj_size_index] += count;
1729                 }
1730
1731                 /*
1732                  * If there are free slots in the block, add
1733                  * the block to the corresponding free list.
1734                  */
1735                 if (have_free) {
1736                         MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
1737
1738                         if (!lazy_sweep)
1739                                 SGEN_ASSERT (6, block->free_list, "How do we not have a free list when there are free slots?");
1740
1741                         add_free_block (free_blocks, obj_size_index, block);
1742                 }
1743
1744                 /* FIXME: Do we need the heap boundaries while we do nursery collections? */
1745                 update_heap_boundaries_for_block (block);
1746         } else {
1747                 /*
1748                  * Blocks without live objects are removed from the
1749                  * block list and freed.
1750                  */
1751                 SGEN_ASSERT (6, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
1752                 SGEN_ASSERT (6, *block_slot == BLOCK_TAG_CHECKING (tagged_block), "How did the block move?");
1753
1754                 binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
1755                 ms_free_block (block);
1756
1757                 SGEN_ATOMIC_ADD_P (num_major_sections, -1);
1758                 SGEN_ATOMIC_ADD_P (num_major_sections_freed_in_sweep, 1);
1759
1760                 tagged_block = NULL;
1761         }
1762
1763  done:
1764         /*
1765          * Once the block is written back without the checking bit other threads are
1766          * free to access it. Make sure the block state is visible before we write it
1767          * back.
1768          */
1769         mono_memory_write_barrier ();
1770         *block_slot = tagged_block;
1771         return !!tagged_block;
1772 }
1773
1774 static void
1775 sweep_blocks_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
1776 {
1777         volatile gpointer *slot;
1778         MSBlockInfo *bl;
1779
1780         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {
1781                 bl = BLOCK_UNTAG (*slot);
1782                 if (bl)
1783                         sweep_block (bl);
1784         } SGEN_ARRAY_LIST_END_FOREACH_SLOT;
1785
1786         mono_memory_write_barrier ();
1787
1788         sweep_blocks_job = NULL;
1789 }
1790
1791 static void
1792 sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
1793 {
1794         guint32 block_index;
1795         guint32 num_blocks = num_major_sections_before_sweep;
1796
1797         SGEN_ASSERT (0, sweep_in_progress (), "Sweep thread called with wrong state");
1798         SGEN_ASSERT (0, num_blocks <= allocated_blocks.next_slot, "How did we lose blocks?");
1799
1800         /*
1801          * We traverse the block array from high to low.  Nursery collections will have to
1802          * cooperate with the sweep thread to finish sweeping, and they will traverse from
1803          * low to high, to avoid constantly colliding on the same blocks.
1804          */
1805         for (block_index = allocated_blocks.next_slot; block_index-- > 0;) {
1806                 ensure_block_is_checked_for_sweeping (block_index, TRUE, NULL);
1807         }
1808
1809         while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
1810                 /*
1811                  * The main GC thread is currently iterating over the block array to help us
1812                  * finish the sweep.  We have already finished, but we don't want to mess up
1813                  * that iteration, so we just wait for it.
1814                  */
1815                 g_usleep (100);
1816         }
1817
1818         if (SGEN_MAX_ASSERT_LEVEL >= 6) {
1819                 for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
1820                         MSBlockInfo *block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, block_index));
1821                         SGEN_ASSERT (6, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
1822                 }
1823         }
1824
1825         /*
1826          * Concurrently sweep all the blocks to reduce workload during minor
1827          * pauses where we need certain blocks to be swept. At the start of
1828          * the next major we need all blocks to be swept anyway.
1829          */
1830         if (concurrent_sweep && lazy_sweep) {
1831                 sweep_blocks_job = sgen_thread_pool_job_alloc ("sweep_blocks", sweep_blocks_job_func, sizeof (SgenThreadPoolJob));
1832                 sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_blocks_job);
1833         }
1834
1835         sweep_finish ();
1836
1837         sweep_job = NULL;
1838 }
1839
1840 static void
1841 sweep_finish (void)
1842 {
1843         mword used_slots_size = 0;
1844         int i;
1845
1846         for (i = 0; i < num_block_obj_sizes; ++i) {
1847                 float usage = (float)sweep_slots_used [i] / (float)sweep_slots_available [i];
1848                 if (sweep_num_blocks [i] > 5 && usage < evacuation_threshold) {
1849                         evacuate_block_obj_sizes [i] = TRUE;
1850                         /*
1851                         g_print ("slot size %d - %d of %d used\n",
1852                                         block_obj_sizes [i], slots_used [i], slots_available [i]);
1853                         */
1854                 } else {
1855                         evacuate_block_obj_sizes [i] = FALSE;
1856                 }
1857
1858                 used_slots_size += sweep_slots_used [i] * block_obj_sizes [i];
1859         }
1860
1861         sgen_memgov_major_post_sweep (used_slots_size);
1862
1863         set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
1864         if (concurrent_sweep)
1865                 binary_protocol_concurrent_sweep_end (sgen_timestamp ());
1866 }
1867
1868 static void
1869 major_sweep (void)
1870 {
1871         set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_NEED_SWEEPING);
1872
1873         sweep_start ();
1874
1875         num_major_sections_before_sweep = num_major_sections;
1876         num_major_sections_freed_in_sweep = 0;
1877
1878         SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
1879         if (concurrent_sweep) {
1880                 sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
1881                 sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_job);
1882         } else {
1883                 sweep_job_func (NULL, NULL);
1884         }
1885 }
1886
1887 static gboolean
1888 major_have_swept (void)
1889 {
1890         return sweep_state == SWEEP_STATE_SWEPT;
1891 }
1892
1893 static int count_pinned_ref;
1894 static int count_pinned_nonref;
1895 static int count_nonpinned_ref;
1896 static int count_nonpinned_nonref;
1897
1898 static void
1899 count_nonpinned_callback (GCObject *obj, size_t size, void *data)
1900 {
1901         GCVTable vtable = SGEN_LOAD_VTABLE (obj);
1902
1903         if (SGEN_VTABLE_HAS_REFERENCES (vtable))
1904                 ++count_nonpinned_ref;
1905         else
1906                 ++count_nonpinned_nonref;
1907 }
1908
1909 static void
1910 count_pinned_callback (GCObject *obj, size_t size, void *data)
1911 {
1912         GCVTable vtable = SGEN_LOAD_VTABLE (obj);
1913
1914         if (SGEN_VTABLE_HAS_REFERENCES (vtable))
1915                 ++count_pinned_ref;
1916         else
1917                 ++count_pinned_nonref;
1918 }
1919
1920 static G_GNUC_UNUSED void
1921 count_ref_nonref_objs (void)
1922 {
1923         int total;
1924
1925         count_pinned_ref = 0;
1926         count_pinned_nonref = 0;
1927         count_nonpinned_ref = 0;
1928         count_nonpinned_nonref = 0;
1929
1930         major_iterate_objects (ITERATE_OBJECTS_SWEEP_NON_PINNED, count_nonpinned_callback, NULL);
1931         major_iterate_objects (ITERATE_OBJECTS_SWEEP_PINNED, count_pinned_callback, NULL);
1932
1933         total = count_pinned_nonref + count_nonpinned_nonref + count_pinned_ref + count_nonpinned_ref;
1934
1935         g_print ("ref: %d pinned %d non-pinned   non-ref: %d pinned %d non-pinned  --  %.1f\n",
1936                         count_pinned_ref, count_nonpinned_ref,
1937                         count_pinned_nonref, count_nonpinned_nonref,
1938                         (count_pinned_nonref + count_nonpinned_nonref) * 100.0 / total);
1939 }
1940
1941 static int
1942 ms_calculate_block_obj_sizes (double factor, int *arr)
1943 {
1944         double target_size;
1945         int num_sizes = 0;
1946         int last_size = 0;
1947
1948         /*
1949          * Have every possible slot size starting with the minimal
1950          * object size up to and including four times that size.  Then
1951          * proceed by increasing geometrically with the given factor.
1952          */
1953
1954         for (int size = SGEN_CLIENT_MINIMUM_OBJECT_SIZE; size <= 4 * SGEN_CLIENT_MINIMUM_OBJECT_SIZE; size += SGEN_ALLOC_ALIGN) {
1955                 if (arr)
1956                         arr [num_sizes] = size;
1957                 ++num_sizes;
1958                 last_size = size;
1959         }
1960         target_size = (double)last_size;
1961
1962         do {
1963                 int target_count = (int)floor (MS_BLOCK_FREE / target_size);
1964                 int size = MIN ((MS_BLOCK_FREE / target_count) & ~(SGEN_ALLOC_ALIGN - 1), SGEN_MAX_SMALL_OBJ_SIZE);
1965
1966                 if (size != last_size) {
1967                         if (arr)
1968                                 arr [num_sizes] = size;
1969                         ++num_sizes;
1970                         last_size = size;
1971                 }
1972
1973                 target_size *= factor;
1974         } while (last_size < SGEN_MAX_SMALL_OBJ_SIZE);
1975
1976         return num_sizes;
1977 }
1978
1979 /* only valid during minor collections */
1980 static mword old_num_major_sections;
1981
1982 static void
1983 major_start_nursery_collection (void)
1984 {
1985 #ifdef MARKSWEEP_CONSISTENCY_CHECK
1986         consistency_check ();
1987 #endif
1988
1989         old_num_major_sections = num_major_sections;
1990
1991         /* Compact the block list if it hasn't been compacted in a while and nobody is using it */
1992         if (compact_blocks && !sweep_in_progress () && !sweep_blocks_job && !sgen_concurrent_collection_in_progress ()) {
1993                 /*
1994                  * We support null elements in the array but do regular compaction to avoid
1995                  * excessive traversal of the array and to facilitate splitting into well
1996                  * balanced sections for parallel modes. We compact as soon as possible after
1997                  * sweep.
1998                  */
1999                 sgen_array_list_remove_nulls (&allocated_blocks);
2000                 compact_blocks = FALSE;
2001         }
2002 }
2003
2004 static void
2005 major_finish_nursery_collection (void)
2006 {
2007 #ifdef MARKSWEEP_CONSISTENCY_CHECK
2008         consistency_check ();
2009 #endif
2010 }
2011
2012 static int
2013 block_usage_comparer (const void *bl1, const void *bl2)
2014 {
2015         const gint16 nused1 = (*(MSBlockInfo**)bl1)->nused;
2016         const gint16 nused2 = (*(MSBlockInfo**)bl2)->nused;
2017
2018         return nused2 - nused1;
2019 }
2020
2021 static void
2022 sgen_evacuation_freelist_blocks (MSBlockInfo * volatile *block_list, int size_index)
2023 {
2024         MSBlockInfo **evacuated_blocks;
2025         size_t index = 0, count, num_blocks = 0, num_used = 0;
2026         MSBlockInfo *info;
2027         MSBlockInfo * volatile *prev;
2028
2029         for (info = *block_list; info != NULL; info = info->next_free) {
2030                 num_blocks++;
2031                 num_used += info->nused;
2032         }
2033
2034         /*
2035          * We have a set of blocks in the freelist which will be evacuated. Instead
2036          * of evacuating all of the blocks into new ones, we traverse the freelist
2037          * sorting it by the number of occupied slots, evacuating the objects from
2038          * blocks with fewer used slots into fuller blocks.
2039          *
2040          * The number of used slots is set at the end of the previous sweep. Since
2041          * we sequentially unlink slots from blocks, except for the head of the
2042          * freelist, for blocks on the freelist, the number of used slots is the same
2043          * as at the end of the previous sweep.
2044          */
2045         evacuated_blocks = (MSBlockInfo**)sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY, TRUE);
2046
2047         for (info = *block_list; info != NULL; info = info->next_free) {
2048                 evacuated_blocks [index++] = info;
2049         }
2050
2051         SGEN_ASSERT (0, num_blocks == index, "Why did the freelist change ?");
2052
2053         sgen_qsort (evacuated_blocks, num_blocks, sizeof (gpointer), block_usage_comparer);
2054
2055         /*
2056          * Form a new freelist with the fullest blocks. These blocks will also be
2057          * marked as to_space so we don't evacuate from them.
2058          */
2059         count = MS_BLOCK_FREE / block_obj_sizes [size_index];
2060         prev = block_list;
2061         for (index = 0; index < (num_used + count - 1) / count; index++) {
2062                 SGEN_ASSERT (0, index < num_blocks, "Why do we need more blocks for compaction than we already had ?");
2063                 info = evacuated_blocks [index];
2064                 info->is_to_space = TRUE;
2065                 *prev = info;
2066                 prev = &info->next_free;
2067         }
2068         *prev = NULL;
2069
2070         sgen_free_internal_dynamic (evacuated_blocks, sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY);
2071 }
2072
2073 static void
2074 major_start_major_collection (void)
2075 {
2076         MSBlockInfo *block;
2077         int i;
2078
2079         major_finish_sweep_checking ();
2080
2081         /*
2082          * Clear the free lists for block sizes where we do evacuation.  For those block
2083          * sizes we will have to allocate new blocks.
2084          */
2085         for (i = 0; i < num_block_obj_sizes; ++i) {
2086                 if (!evacuate_block_obj_sizes [i])
2087                         continue;
2088
2089                 binary_protocol_evacuating_blocks (block_obj_sizes [i]);
2090
2091                 sgen_evacuation_freelist_blocks (&free_block_lists [0][i], i);
2092                 sgen_evacuation_freelist_blocks (&free_block_lists [MS_BLOCK_FLAG_REFS][i], i);
2093         }
2094
2095         /* We expect workers to have very few blocks on the freelist, just evacuate them */
2096         sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists_evac);
2097         sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists_evac);
2098
2099         if (lazy_sweep && concurrent_sweep) {
2100                 /*
2101                  * sweep_blocks_job is created before sweep_finish, which we wait for above
2102                  * (major_finish_sweep_checking). After the end of sweep, if we don't have
2103                  * sweep_blocks_job set, it means that it has already been run.
2104                  */
2105                 SgenThreadPoolJob *job = sweep_blocks_job;
2106                 if (job)
2107                         sgen_thread_pool_job_wait (sweep_pool_context, job);
2108         }
2109
2110         if (lazy_sweep && !concurrent_sweep)
2111                 binary_protocol_sweep_begin (GENERATION_OLD, TRUE);
2112         /* Sweep all unswept blocks and set them to MARKING */
2113         FOREACH_BLOCK_NO_LOCK (block) {
2114                 if (lazy_sweep && !concurrent_sweep)
2115                         sweep_block (block);
2116                 SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "All blocks must be swept when we're pinning.");
2117                 set_block_state (block, BLOCK_STATE_MARKING, BLOCK_STATE_SWEPT);
2118                 /*
2119                  * Swept blocks that have a null free_list are full. Evacuation is not
2120                  * effective on these blocks since we expect them to have high usage anyway,
2121                  * given that the survival rate for majors is relatively high.
2122                  */
2123                 if (evacuate_block_obj_sizes [block->obj_size_index] && !block->free_list)
2124                         block->is_to_space = TRUE;
2125         } END_FOREACH_BLOCK_NO_LOCK;
2126         if (lazy_sweep && !concurrent_sweep)
2127                 binary_protocol_sweep_end (GENERATION_OLD, TRUE);
2128
2129         set_sweep_state (SWEEP_STATE_NEED_SWEEPING, SWEEP_STATE_SWEPT);
2130 }
2131
2132 static void
2133 major_finish_major_collection (ScannedObjectCounts *counts)
2134 {
2135 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
2136         if (binary_protocol_is_enabled ()) {
2137                 counts->num_scanned_objects = scanned_objects_list.next_slot;
2138
2139                 sgen_pointer_queue_sort_uniq (&scanned_objects_list);
2140                 counts->num_unique_scanned_objects = scanned_objects_list.next_slot;
2141
2142                 sgen_pointer_queue_clear (&scanned_objects_list);
2143         }
2144 #endif
2145 }
2146
2147 static int
2148 compare_pointers (const void *va, const void *vb) {
2149         char *a = *(char**)va, *b = *(char**)vb;
2150         if (a < b)
2151                 return -1;
2152         if (a > b)
2153                 return 1;
2154         return 0;
2155 }
2156
2157 /*
2158  * This is called with sweep completed and the world stopped.
2159  */
2160 static void
2161 major_free_swept_blocks (size_t section_reserve)
2162 {
2163         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
2164
2165 #if defined(HOST_WIN32) || defined(HOST_ORBIS)
2166                 /*
2167                  * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of
2168                  * a VirtualAlloc ()-ed block.
2169                  */
2170                 return;
2171 #endif
2172
2173         {
2174                 int i, num_empty_blocks_orig, num_blocks, arr_length;
2175                 void *block;
2176                 void **empty_block_arr;
2177                 void **rebuild_next;
2178
2179                 if (num_empty_blocks <= section_reserve)
2180                         return;
2181                 SGEN_ASSERT (0, num_empty_blocks > 0, "section reserve can't be negative");
2182
2183                 num_empty_blocks_orig = num_empty_blocks;
2184                 empty_block_arr = (void**)sgen_alloc_internal_dynamic (sizeof (void*) * num_empty_blocks_orig,
2185                                 INTERNAL_MEM_MS_BLOCK_INFO_SORT, FALSE);
2186                 if (!empty_block_arr)
2187                         goto fallback;
2188
2189                 i = 0;
2190                 for (block = empty_blocks; block; block = *(void**)block)
2191                         empty_block_arr [i++] = block;
2192                 SGEN_ASSERT (0, i == num_empty_blocks, "empty block count wrong");
2193
2194                 sgen_qsort (empty_block_arr, num_empty_blocks, sizeof (void*), compare_pointers);
2195
2196                 /*
2197                  * We iterate over the free blocks, trying to find MS_BLOCK_ALLOC_NUM
2198                  * contiguous ones.  If we do, we free them.  If that's not enough to get to
2199                  * section_reserve, we halve the number of contiguous blocks we're looking
2200                  * for and have another go, until we're done with looking for pairs of
2201                  * blocks, at which point we give up and go to the fallback.
2202                  */
2203                 arr_length = num_empty_blocks_orig;
2204                 num_blocks = MS_BLOCK_ALLOC_NUM;
2205                 while (num_empty_blocks > section_reserve && num_blocks > 1) {
2206                         int first = -1;
2207                         int dest = 0;
2208
2209                         dest = 0;
2210                         for (i = 0; i < arr_length; ++i) {
2211                                 int d = dest;
2212                                 void *block = empty_block_arr [i];
2213                                 SGEN_ASSERT (6, block, "we're not shifting correctly");
2214                                 if (i != dest) {
2215                                         empty_block_arr [dest] = block;
2216                                         /*
2217                                          * This is not strictly necessary, but we're
2218                                          * cautious.
2219                                          */
2220                                         empty_block_arr [i] = NULL;
2221                                 }
2222                                 ++dest;
2223
2224                                 if (first < 0) {
2225                                         first = d;
2226                                         continue;
2227                                 }
2228
2229                                 SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
2230
2231                                 if ((char*)block != ((char*)empty_block_arr [d-1]) + ms_block_size) {
2232                                         first = d;
2233                                         continue;
2234                                 }
2235
2236                                 if (d + 1 - first == num_blocks) {
2237                                         /*
2238                                          * We found num_blocks contiguous blocks.  Free them
2239                                          * and null their array entries.  As an optimization
2240                                          * we could, instead of nulling the entries, shift
2241                                          * the following entries over to the left, while
2242                                          * we're iterating.
2243                                          */
2244                                         int j;
2245                                         sgen_free_os_memory (empty_block_arr [first], ms_block_size * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
2246                                         for (j = first; j <= d; ++j)
2247                                                 empty_block_arr [j] = NULL;
2248                                         dest = first;
2249                                         first = -1;
2250
2251                                         num_empty_blocks -= num_blocks;
2252
2253                                         stat_major_blocks_freed += num_blocks;
2254                                         if (num_blocks == MS_BLOCK_ALLOC_NUM)
2255                                                 stat_major_blocks_freed_ideal += num_blocks;
2256                                         else
2257                                                 stat_major_blocks_freed_less_ideal += num_blocks;
2258
2259                                 }
2260                         }
2261
2262                         SGEN_ASSERT (6, dest <= i && dest <= arr_length, "array length is off");
2263                         arr_length = dest;
2264                         SGEN_ASSERT (6, arr_length == num_empty_blocks, "array length is off");
2265
2266                         num_blocks >>= 1;
2267                 }
2268
2269                 /* rebuild empty_blocks free list */
2270                 rebuild_next = (void**)&empty_blocks;
2271                 for (i = 0; i < arr_length; ++i) {
2272                         void *block = empty_block_arr [i];
2273                         SGEN_ASSERT (6, block, "we're missing blocks");
2274                         *rebuild_next = block;
2275                         rebuild_next = (void**)block;
2276                 }
2277                 *rebuild_next = NULL;
2278
2279                 /* free array */
2280                 sgen_free_internal_dynamic (empty_block_arr, sizeof (void*) * num_empty_blocks_orig, INTERNAL_MEM_MS_BLOCK_INFO_SORT);
2281         }
2282
2283         SGEN_ASSERT (0, num_empty_blocks >= 0, "we freed more blocks than we had in the first place?");
2284
2285  fallback:
2286         /*
2287          * This is our threshold.  If there's not more empty than used blocks, we won't
2288          * release uncontiguous blocks, in fear of fragmenting the address space.
2289          */
2290         if (num_empty_blocks <= num_major_sections)
2291                 return;
2292
2293         while (num_empty_blocks > section_reserve) {
2294                 void *next = *(void**)empty_blocks;
2295                 sgen_free_os_memory (empty_blocks, ms_block_size, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
2296                 empty_blocks = next;
2297                 /*
2298                  * Needs not be atomic because this is running
2299                  * single-threaded.
2300                  */
2301                 --num_empty_blocks;
2302
2303                 ++stat_major_blocks_freed;
2304                 ++stat_major_blocks_freed_individual;
2305         }
2306 }
2307
2308 static void
2309 major_pin_objects (SgenGrayQueue *queue)
2310 {
2311         MSBlockInfo *block;
2312
2313         FOREACH_BLOCK_NO_LOCK (block) {
2314                 size_t first_entry, last_entry;
2315                 SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
2316                 sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size,
2317                                 &first_entry, &last_entry);
2318                 mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
2319         } END_FOREACH_BLOCK_NO_LOCK;
2320 }
2321
2322 static void
2323 major_init_to_space (void)
2324 {
2325 }
2326
2327 static void
2328 major_report_pinned_memory_usage (void)
2329 {
2330         g_assert_not_reached ();
2331 }
2332
2333 static gint64
2334 major_get_used_size (void)
2335 {
2336         gint64 size = 0;
2337         MSBlockInfo *block;
2338
2339         /*
2340          * We're holding the GC lock, but the sweep thread might be running.  Make sure it's
2341          * finished, then we can iterate over the block array.
2342          */
2343         major_finish_sweep_checking ();
2344
2345         FOREACH_BLOCK_NO_LOCK (block) {
2346                 int count = MS_BLOCK_FREE / block->obj_size;
2347                 void **iter;
2348                 size += count * block->obj_size;
2349                 for (iter = block->free_list; iter; iter = (void**)*iter)
2350                         size -= block->obj_size;
2351         } END_FOREACH_BLOCK_NO_LOCK;
2352
2353         return size;
2354 }
2355
2356 /* FIXME: return number of bytes, not of sections */
2357 static size_t
2358 get_num_major_sections (void)
2359 {
2360         return num_major_sections;
2361 }
2362
2363 /*
2364  * Returns the number of bytes in blocks that were present when the last sweep was
2365  * initiated, and were not freed during the sweep.  They are the basis for calculating the
2366  * allowance.
2367  */
2368 static size_t
2369 get_bytes_survived_last_sweep (void)
2370 {
2371         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep");
2372         return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * ms_block_size;
2373 }
2374
2375 static gboolean
2376 major_handle_gc_param (const char *opt)
2377 {
2378         if (g_str_has_prefix (opt, "evacuation-threshold=")) {
2379                 const char *arg = strchr (opt, '=') + 1;
2380                 int percentage = atoi (arg);
2381                 if (percentage < 0 || percentage > 100) {
2382                         fprintf (stderr, "evacuation-threshold must be an integer in the range 0-100.\n");
2383                         exit (1);
2384                 }
2385                 evacuation_threshold = (float)percentage / 100.0f;
2386                 return TRUE;
2387         } else if (!strcmp (opt, "lazy-sweep")) {
2388                 lazy_sweep = TRUE;
2389                 return TRUE;
2390         } else if (!strcmp (opt, "no-lazy-sweep")) {
2391                 lazy_sweep = FALSE;
2392                 return TRUE;
2393         } else if (!strcmp (opt, "concurrent-sweep")) {
2394                 concurrent_sweep = TRUE;
2395                 return TRUE;
2396         } else if (!strcmp (opt, "no-concurrent-sweep")) {
2397                 concurrent_sweep = FALSE;
2398                 return TRUE;
2399         }
2400
2401         return FALSE;
2402 }
2403
2404 static void
2405 major_print_gc_param_usage (void)
2406 {
2407         fprintf (stderr,
2408                         ""
2409                         "  evacuation-threshold=P (where P is a percentage, an integer in 0-100)\n"
2410                         "  (no-)lazy-sweep\n"
2411                         "  (no-)concurrent-sweep\n"
2412                         );
2413 }
2414
2415 /*
2416  * This callback is used to clear cards, move cards to the shadow table and do counting.
2417  */
2418 static void
2419 major_iterate_block_ranges (sgen_cardtable_block_callback callback)
2420 {
2421         MSBlockInfo *block;
2422         gboolean has_references;
2423
2424         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2425                 if (has_references)
2426                         callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
2427         } END_FOREACH_BLOCK_NO_LOCK;
2428 }
2429
2430 static void
2431 major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
2432 {
2433         MSBlockInfo *block;
2434         gboolean has_references;
2435
2436         major_finish_sweep_checking ();
2437         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2438                 if (has_references)
2439                         callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
2440         } END_FOREACH_BLOCK_NO_LOCK;
2441 }
2442
2443 #ifdef HEAVY_STATISTICS
2444 extern guint64 marked_cards;
2445 extern guint64 scanned_cards;
2446 extern guint64 scanned_objects;
2447 extern guint64 remarked_cards;
2448 #endif
2449
2450 #define CARD_WORDS_PER_BLOCK (CARDS_PER_BLOCK / SIZEOF_VOID_P)
2451 /*
2452  * MS blocks are 16K aligned.
2453  * Cardtables are 4K aligned, at least.
2454  * This means that the cardtable of a given block is 32 bytes aligned.
2455  */
2456 static guint8*
2457 initial_skip_card (guint8 *card_data)
2458 {
2459         mword *cards = (mword*)card_data;
2460         mword card = 0;
2461         int i;
2462         for (i = 0; i < CARD_WORDS_PER_BLOCK; ++i) {
2463                 card = cards [i];
2464                 if (card)
2465                         break;
2466         }
2467
2468         if (i == CARD_WORDS_PER_BLOCK)
2469                 return card_data + CARDS_PER_BLOCK;
2470
2471 #if defined(__i386__) && defined(__GNUC__)
2472         return card_data + i * 4 +  (__builtin_ffs (card) - 1) / 8;
2473 #elif defined(__x86_64__) && defined(__GNUC__)
2474         return card_data + i * 8 +  (__builtin_ffsll (card) - 1) / 8;
2475 #elif defined(__s390x__) && defined(__GNUC__)
2476         return card_data + i * 8 +  (__builtin_ffsll (GUINT64_TO_LE(card)) - 1) / 8;
2477 #else
2478         for (i = i * SIZEOF_VOID_P; i < CARDS_PER_BLOCK; ++i) {
2479                 if (card_data [i])
2480                         return &card_data [i];
2481         }
2482         return card_data;
2483 #endif
2484 }
2485
2486 #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os) (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os))
2487 #define MS_BLOCK_OBJ_FAST(b,os,i)                       ((b) + MS_BLOCK_SKIP + (os) * (i))
2488 #define MS_OBJ_ALLOCED_FAST(o,b)                (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + ms_block_size))
2489
2490 static void
2491 scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx)
2492 {
2493         SgenGrayQueue *queue = ctx.queue;
2494         ScanObjectFunc scan_func = ctx.ops->scan_object;
2495         /*
2496          * FIXME: On systems with very large pages, we allocate fairly large
2497          * arrays on the stack here. This shouldn't be a problem once block
2498          * size is no longer required to be a multiple of the system page size.
2499          */
2500 #ifndef SGEN_HAVE_OVERLAPPING_CARDS
2501         guint8 *cards_copy = alloca (sizeof (guint8) * CARDS_PER_BLOCK);
2502 #endif
2503         guint8 *cards_preclean = alloca (sizeof (guint8) * CARDS_PER_BLOCK);
2504         gboolean small_objects;
2505         int block_obj_size;
2506         char *block_start;
2507         guint8 *card_data, *card_base;
2508         guint8 *card_data_end;
2509         char *scan_front = NULL;
2510
2511         /* The concurrent mark doesn't enter evacuating blocks */
2512         if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN && major_block_is_evacuating (block))
2513                 return;
2514
2515         block_obj_size = block->obj_size;
2516         small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
2517
2518         block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
2519
2520         /*
2521          * This is safe in face of card aliasing for the following reason:
2522          *
2523          * Major blocks are 16k aligned, or 32 cards aligned.
2524          * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
2525          * sizes, they won't overflow the cardtable overlap modulus.
2526          */
2527         if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
2528                 card_data = card_base = block->cardtable_mod_union;
2529                 /*
2530                  * This happens when the nursery collection that precedes finishing
2531                  * the concurrent collection allocates new major blocks.
2532                  */
2533                 if (!card_data)
2534                         return;
2535
2536                 if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN) {
2537                         sgen_card_table_preclean_mod_union (card_data, cards_preclean, CARDS_PER_BLOCK);
2538                         card_data = card_base = cards_preclean;
2539                 }
2540         } else {
2541 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
2542                 card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
2543 #else
2544                 if (!sgen_card_table_get_card_data (cards_copy, (mword)block_start, CARDS_PER_BLOCK))
2545                         return;
2546                 card_data = card_base = cards_copy;
2547 #endif
2548         }
2549         card_data_end = card_data + CARDS_PER_BLOCK;
2550
2551         card_data += MS_BLOCK_SKIP >> CARD_BITS;
2552
2553         card_data = initial_skip_card (card_data);
2554         while (card_data < card_data_end) {
2555                 size_t card_index, first_object_index;
2556                 char *start;
2557                 char *end;
2558                 char *first_obj, *obj;
2559
2560                 HEAVY_STAT (++scanned_cards);
2561
2562                 if (!*card_data) {
2563                         ++card_data;
2564                         continue;
2565                 }
2566
2567                 card_index = card_data - card_base;
2568                 start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
2569                 end = start + CARD_SIZE_IN_BYTES;
2570
2571                 if (!block_is_swept_or_marking (block))
2572                         sweep_block (block);
2573
2574                 HEAVY_STAT (++marked_cards);
2575
2576                 if (small_objects)
2577                         sgen_card_table_prepare_card_for_scanning (card_data);
2578
2579                 /*
2580                  * If the card we're looking at starts at or in the block header, we
2581                  * must start at the first object in the block, without calculating
2582                  * the index of the object we're hypothetically starting at, because
2583                  * it would be negative.
2584                  */
2585                 if (card_index <= (MS_BLOCK_SKIP >> CARD_BITS))
2586                         first_object_index = 0;
2587                 else
2588                         first_object_index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
2589
2590                 obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
2591
2592                 binary_protocol_card_scan (first_obj, end - first_obj);
2593
2594                 while (obj < end) {
2595                         if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
2596                                 goto next_object;
2597
2598                         if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
2599                                 /* FIXME: do this more efficiently */
2600                                 int w, b;
2601                                 MS_CALC_MARK_BIT (w, b, obj);
2602                                 if (!MS_MARK_BIT (block, w, b))
2603                                         goto next_object;
2604                         }
2605
2606                         GCObject *object = (GCObject*)obj;
2607
2608                         if (small_objects) {
2609                                 HEAVY_STAT (++scanned_objects);
2610                                 scan_func (object, sgen_obj_get_descriptor (object), queue);
2611                         } else {
2612                                 size_t offset = sgen_card_table_get_card_offset (obj, block_start);
2613                                 sgen_cardtable_scan_object (object, block_obj_size, card_base + offset, ctx);
2614                         }
2615                 next_object:
2616                         obj += block_obj_size;
2617                         g_assert (scan_front <= obj);
2618                         scan_front = obj;
2619                 }
2620
2621                 HEAVY_STAT (if (*card_data) ++remarked_cards);
2622
2623                 if (small_objects)
2624                         ++card_data;
2625                 else
2626                         card_data = card_base + sgen_card_table_get_card_offset (obj, block_start);
2627         }
2628 }
2629
2630 static void
2631 major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count)
2632 {
2633         MSBlockInfo *block;
2634         gboolean has_references, was_sweeping, skip_scan;
2635         int first_block, last_block, index;
2636
2637         /*
2638          * The last_block's index is at least (num_major_sections - 1) since we
2639          * can have nulls in the allocated_blocks list. The last worker will
2640          * scan the left-overs of the list. We expect few null entries in the
2641          * allocated_blocks list, therefore using num_major_sections for computing
2642          * block_count shouldn't affect work distribution.
2643          */
2644         first_block = block_count * job_index;
2645         if (job_index == job_split_count - 1)
2646                 last_block = allocated_blocks.next_slot;
2647         else
2648                 last_block = block_count * (job_index + 1);
2649
2650         if (!concurrent_mark)
2651                 g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
2652
2653         if (scan_type != CARDTABLE_SCAN_GLOBAL)
2654                 SGEN_ASSERT (0, !sweep_in_progress (), "Sweep should be finished when we scan mod union card table");
2655         was_sweeping = sweep_in_progress ();
2656
2657         binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
2658         FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK (block, first_block, last_block, index, has_references) {
2659 #ifdef PREFETCH_CARDS
2660                 int prefetch_index = index + 6;
2661                 if (prefetch_index < allocated_blocks.next_slot) {
2662                         MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
2663                         PREFETCH_READ (prefetch_block);
2664                         if (scan_type == CARDTABLE_SCAN_GLOBAL) {
2665                                 guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
2666                                 PREFETCH_WRITE (prefetch_cards);
2667                                 PREFETCH_WRITE (prefetch_cards + 32);
2668                         }
2669                 }
2670 #endif
2671                 if (!has_references)
2672                         continue;
2673                 skip_scan = FALSE;
2674
2675                 if (scan_type == CARDTABLE_SCAN_GLOBAL) {
2676                         gpointer *card_start = (gpointer*) sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (block));
2677                         gboolean has_dirty_cards = FALSE;
2678                         int i;
2679                         for (i = 0; i < CARDS_PER_BLOCK / sizeof(gpointer); i++) {
2680                                 if (card_start [i]) {
2681                                         has_dirty_cards = TRUE;
2682                                         break;
2683                                 }
2684                         }
2685                         if (!has_dirty_cards) {
2686                                 skip_scan = TRUE;
2687                         } else {
2688                                 /*
2689                                  * After the start of the concurrent collections, blocks change state
2690                                  * to marking. We should not sweep it in that case. We can't race with
2691                                  * sweep start since we are in a nursery collection. Also avoid CAS-ing
2692                                  */
2693                                 if (sweep_in_progress ()) {
2694                                         skip_scan = !ensure_block_is_checked_for_sweeping (index, TRUE, NULL);
2695                                 } else if (was_sweeping) {
2696                                         /* Recheck in case sweep finished after dereferencing the slot */
2697                                         skip_scan = *sgen_array_list_get_slot (&allocated_blocks, index) == 0;
2698                                 }
2699                         }
2700                 }
2701                 if (!skip_scan)
2702                         scan_card_table_for_block (block, scan_type, ctx);
2703         } END_FOREACH_BLOCK_RANGE_NO_LOCK;
2704         binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
2705 }
2706
2707 static void
2708 major_count_cards (long long *num_total_cards, long long *num_marked_cards)
2709 {
2710         MSBlockInfo *block;
2711         gboolean has_references;
2712         long long total_cards = 0;
2713         long long marked_cards = 0;
2714
2715         if (sweep_in_progress ()) {
2716                 *num_total_cards = -1;
2717                 *num_marked_cards = -1;
2718                 return;
2719         }
2720
2721         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2722                 guint8 *cards = sgen_card_table_get_card_scan_address ((mword) MS_BLOCK_FOR_BLOCK_INFO (block));
2723                 int i;
2724
2725                 if (!has_references)
2726                         continue;
2727
2728                 total_cards += CARDS_PER_BLOCK;
2729                 for (i = 0; i < CARDS_PER_BLOCK; ++i) {
2730                         if (cards [i])
2731                                 ++marked_cards;
2732                 }
2733         } END_FOREACH_BLOCK_NO_LOCK;
2734
2735         *num_total_cards = total_cards;
2736         *num_marked_cards = marked_cards;
2737 }
2738
2739 static void
2740 update_cardtable_mod_union (void)
2741 {
2742         MSBlockInfo *block;
2743
2744         FOREACH_BLOCK_NO_LOCK (block) {
2745                 gpointer *card_start = (gpointer*) sgen_card_table_get_card_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (block));
2746                 gboolean has_dirty_cards = FALSE;
2747                 int i;
2748                 for (i = 0; i < CARDS_PER_BLOCK / sizeof(gpointer); i++) {
2749                         if (card_start [i]) {
2750                                 has_dirty_cards = TRUE;
2751                                 break;
2752                         }
2753                 }
2754                 if (has_dirty_cards) {
2755                         size_t num_cards;
2756                         guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE);
2757                         sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size, &num_cards);
2758                         SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
2759                 }
2760         } END_FOREACH_BLOCK_NO_LOCK;
2761 }
2762
2763 #undef pthread_create
2764
2765 static void
2766 post_param_init (SgenMajorCollector *collector)
2767 {
2768         collector->sweeps_lazily = lazy_sweep;
2769 }
2770
2771 /*
2772  * We are guaranteed to be called by the worker in question.
2773  * This provides initialization for threads that plan to do
2774  * parallel object allocation. We need to store these lists
2775  * in additional data structures so we can traverse them
2776  * at major/sweep start.
2777  */
2778 static void
2779 sgen_init_block_free_lists (gpointer *list_p)
2780 {
2781         int i;
2782         MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) mono_native_tls_get_value (worker_block_free_list_key);
2783
2784         /*
2785          * For simplification, a worker thread uses the same free block lists,
2786          * regardless of the context it is part of (major/minor).
2787          */
2788         if (worker_free_blocks) {
2789                 *list_p = (gpointer)worker_free_blocks;
2790                 return;
2791         }
2792
2793         worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE);
2794
2795         for (i = 0; i < MS_BLOCK_TYPE_MAX; i++)
2796                 worker_free_blocks [i] = (MSBlockInfo **) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2797
2798         *list_p = (gpointer)worker_free_blocks;
2799
2800         mono_native_tls_set_value (worker_block_free_list_key, worker_free_blocks);
2801 }
2802
2803 static void
2804 sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurrent, gboolean is_parallel)
2805 {
2806         int i;
2807
2808         ms_block_size = mono_pagesize ();
2809
2810         if (ms_block_size < MS_BLOCK_SIZE_MIN)
2811                 ms_block_size = MS_BLOCK_SIZE_MIN;
2812
2813         sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, SIZEOF_MS_BLOCK_INFO);
2814
2815         if (mono_cpu_count () <= 1)
2816                 is_parallel = FALSE;
2817
2818         num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL);
2819         block_obj_sizes = (int *)sgen_alloc_internal_dynamic (sizeof (int) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2820         ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, block_obj_sizes);
2821
2822         evacuate_block_obj_sizes = (gboolean *)sgen_alloc_internal_dynamic (sizeof (gboolean) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2823         for (i = 0; i < num_block_obj_sizes; ++i)
2824                 evacuate_block_obj_sizes [i] = FALSE;
2825
2826         sweep_slots_available = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2827         sweep_slots_used = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2828         sweep_num_blocks = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2829
2830         /*
2831         {
2832                 int i;
2833                 g_print ("block object sizes:\n");
2834                 for (i = 0; i < num_block_obj_sizes; ++i)
2835                         g_print ("%d\n", block_obj_sizes [i]);
2836         }
2837         */
2838
2839         for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i)
2840                 free_block_lists [i] = (MSBlockInfo *volatile *)sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2841
2842         for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES; ++i)
2843                 fast_block_obj_size_indexes [i] = ms_find_block_obj_size_index (i * 8);
2844         for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
2845                 g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
2846
2847         /* We can do this because we always init the minor before the major */
2848         if (is_parallel || sgen_get_minor_collector ()->is_parallel)
2849                 mono_native_tls_alloc (&worker_block_free_list_key, NULL);
2850
2851         mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
2852         mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
2853         mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
2854         mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_ideal);
2855         mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_less_ideal);
2856         mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual);
2857         mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal);
2858
2859         collector->section_size = ms_block_size;
2860
2861         concurrent_mark = is_concurrent;
2862         collector->is_concurrent = is_concurrent;
2863         collector->is_parallel = is_parallel;
2864         collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
2865         collector->supports_cardtable = TRUE;
2866
2867         collector->alloc_heap = major_alloc_heap;
2868         collector->is_object_live = major_is_object_live;
2869         collector->alloc_small_pinned_obj = major_alloc_small_pinned_obj;
2870         collector->alloc_degraded = major_alloc_degraded;
2871
2872         collector->alloc_object = major_alloc_object;
2873         collector->alloc_object_par = major_alloc_object_par;
2874         collector->free_pinned_object = free_pinned_object;
2875         collector->iterate_objects = major_iterate_objects;
2876         collector->free_non_pinned_object = major_free_non_pinned_object;
2877         collector->pin_objects = major_pin_objects;
2878         collector->pin_major_object = pin_major_object;
2879         collector->scan_card_table = major_scan_card_table;
2880         collector->iterate_live_block_ranges = major_iterate_live_block_ranges;
2881         collector->iterate_block_ranges = major_iterate_block_ranges;
2882         if (is_concurrent) {
2883                 collector->update_cardtable_mod_union = update_cardtable_mod_union;
2884                 collector->get_cardtable_mod_union_for_reference = major_get_cardtable_mod_union_for_reference;
2885         }
2886         collector->init_to_space = major_init_to_space;
2887         collector->sweep = major_sweep;
2888         collector->have_swept = major_have_swept;
2889         collector->finish_sweeping = major_finish_sweep_checking;
2890         collector->free_swept_blocks = major_free_swept_blocks;
2891         collector->check_scan_starts = major_check_scan_starts;
2892         collector->dump_heap = major_dump_heap;
2893         collector->get_used_size = major_get_used_size;
2894         collector->start_nursery_collection = major_start_nursery_collection;
2895         collector->finish_nursery_collection = major_finish_nursery_collection;
2896         collector->start_major_collection = major_start_major_collection;
2897         collector->finish_major_collection = major_finish_major_collection;
2898         collector->ptr_is_in_non_pinned_space = major_ptr_is_in_non_pinned_space;
2899         collector->ptr_is_from_pinned_alloc = ptr_is_from_pinned_alloc;
2900         collector->report_pinned_memory_usage = major_report_pinned_memory_usage;
2901         collector->get_num_major_sections = get_num_major_sections;
2902         collector->get_bytes_survived_last_sweep = get_bytes_survived_last_sweep;
2903         collector->handle_gc_param = major_handle_gc_param;
2904         collector->print_gc_param_usage = major_print_gc_param_usage;
2905         collector->post_param_init = post_param_init;
2906         collector->is_valid_object = major_is_valid_object;
2907         collector->describe_pointer = major_describe_pointer;
2908         collector->count_cards = major_count_cards;
2909         collector->init_block_free_lists = sgen_init_block_free_lists;
2910
2911         collector->major_ops_serial.copy_or_mark_object = major_copy_or_mark_object_canonical;
2912         collector->major_ops_serial.scan_object = major_scan_object_with_evacuation;
2913         collector->major_ops_serial.scan_ptr_field = major_scan_ptr_field_with_evacuation;
2914         collector->major_ops_serial.drain_gray_stack = drain_gray_stack;
2915         if (is_concurrent) {
2916                 collector->major_ops_concurrent_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
2917                 collector->major_ops_concurrent_start.scan_object = major_scan_object_concurrent_with_evacuation;
2918                 collector->major_ops_concurrent_start.scan_vtype = major_scan_vtype_concurrent_with_evacuation;
2919                 collector->major_ops_concurrent_start.scan_ptr_field = major_scan_ptr_field_concurrent_with_evacuation;
2920                 collector->major_ops_concurrent_start.drain_gray_stack = drain_gray_stack_concurrent;
2921
2922                 collector->major_ops_concurrent_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_finish_canonical;
2923                 collector->major_ops_concurrent_finish.scan_object = major_scan_object_with_evacuation;
2924                 collector->major_ops_concurrent_finish.scan_vtype = major_scan_vtype_with_evacuation;
2925                 collector->major_ops_concurrent_finish.scan_ptr_field = major_scan_ptr_field_with_evacuation;
2926                 collector->major_ops_concurrent_finish.drain_gray_stack = drain_gray_stack;
2927
2928                 if (is_parallel) {
2929                         collector->major_ops_conc_par_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_canonical;
2930                         collector->major_ops_conc_par_start.scan_object = major_scan_object_concurrent_par_with_evacuation;
2931                         collector->major_ops_conc_par_start.scan_vtype = major_scan_vtype_concurrent_par_with_evacuation;
2932                         collector->major_ops_conc_par_start.scan_ptr_field = major_scan_ptr_field_concurrent_par_with_evacuation;
2933                         collector->major_ops_conc_par_start.drain_gray_stack = drain_gray_stack_concurrent_par;
2934
2935                         collector->major_ops_conc_par_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_finish_canonical;
2936                         collector->major_ops_conc_par_finish.scan_object = major_scan_object_par_with_evacuation;
2937                         collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation;
2938                         collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation;
2939                         collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par;
2940                 }
2941         }
2942
2943 #ifdef HEAVY_STATISTICS
2944         mono_counters_register ("Optimized copy", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy);
2945         mono_counters_register ("Optimized copy nursery", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery);
2946         mono_counters_register ("Optimized copy nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_forwarded);
2947         mono_counters_register ("Optimized copy nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_pinned);
2948         mono_counters_register ("Optimized copy major", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major);
2949         mono_counters_register ("Optimized copy major small fast", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_fast);
2950         mono_counters_register ("Optimized copy major small slow", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_slow);
2951         mono_counters_register ("Optimized copy major small evacuate", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_evacuate);
2952         mono_counters_register ("Optimized copy major large", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_large);
2953         mono_counters_register ("Optimized major scan", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan);
2954         mono_counters_register ("Optimized major scan no refs", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan_no_refs);
2955
2956         mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_loops);
2957         mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fills);
2958         mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fill_failures);
2959 #endif
2960
2961 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
2962         mono_os_mutex_init (&scanned_objects_list_lock);
2963 #endif
2964
2965         SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2");
2966
2967         /*cardtable requires major pages to be 8 cards aligned*/
2968         g_assert ((ms_block_size % (8 * CARD_SIZE_IN_BYTES)) == 0);
2969
2970         if (is_concurrent && is_parallel)
2971                 sgen_workers_create_context (GENERATION_OLD, mono_cpu_count ());
2972         else if (is_concurrent)
2973                 sgen_workers_create_context (GENERATION_OLD, 1);
2974
2975         if (concurrent_sweep)
2976                 sweep_pool_context = sgen_thread_pool_create_context (1, NULL, NULL, NULL, NULL, NULL);
2977 }
2978
2979 void
2980 sgen_marksweep_init (SgenMajorCollector *collector)
2981 {
2982         sgen_marksweep_init_internal (collector, FALSE, FALSE);
2983 }
2984
2985 void
2986 sgen_marksweep_conc_init (SgenMajorCollector *collector)
2987 {
2988         sgen_marksweep_init_internal (collector, TRUE, FALSE);
2989 }
2990
2991 void
2992 sgen_marksweep_conc_par_init (SgenMajorCollector *collector)
2993 {
2994         sgen_marksweep_init_internal (collector, TRUE, TRUE);
2995 }
2996
2997 #endif