[sgen] Use array with fixed-size element for mark words
[mono.git] / mono / sgen / sgen-marksweep.c
1 /*
2  * sgen-marksweep.c: The Mark & Sweep major collector.
3  *
4  * Author:
5  *      Mark Probst <mark.probst@gmail.com>
6  *
7  * Copyright 2009-2010 Novell, Inc.
8  * Copyright (C) 2012 Xamarin Inc
9  *
10  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
11  */
12
13 #include "config.h"
14
15 #ifdef HAVE_SGEN_GC
16
17 #include <math.h>
18 #include <errno.h>
19 #include <string.h>
20 #include <stdlib.h>
21
22 #include "mono/sgen/sgen-gc.h"
23 #include "mono/sgen/sgen-protocol.h"
24 #include "mono/sgen/sgen-cardtable.h"
25 #include "mono/sgen/sgen-memory-governor.h"
26 #include "mono/sgen/sgen-layout-stats.h"
27 #include "mono/sgen/sgen-pointer-queue.h"
28 #include "mono/sgen/sgen-array-list.h"
29 #include "mono/sgen/sgen-pinning.h"
30 #include "mono/sgen/sgen-workers.h"
31 #include "mono/sgen/sgen-thread-pool.h"
32 #include "mono/sgen/sgen-client.h"
33 #include "mono/utils/mono-memory-model.h"
34
35 #if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
36 #define MS_BLOCK_SIZE   ARCH_MIN_MS_BLOCK_SIZE
37 #define MS_BLOCK_SIZE_SHIFT     ARCH_MIN_MS_BLOCK_SIZE_SHIFT
38 #else
39 #define MS_BLOCK_SIZE_SHIFT     14      /* INT FASTENABLE */
40 #define MS_BLOCK_SIZE           (1 << MS_BLOCK_SIZE_SHIFT)
41 #endif
42 #define MAJOR_SECTION_SIZE      MS_BLOCK_SIZE
43 #define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES)
44
45 /*
46  * Don't allocate single blocks, but alloc a contingent of this many
47  * blocks in one swoop.  This must be a power of two.
48  */
49 #define MS_BLOCK_ALLOC_NUM      32
50
51 /*
52  * Number of bytes before the first object in a block.  At the start
53  * of a block is the MSBlockHeader, then opional padding, then come
54  * the objects, so this must be >= sizeof (MSBlockHeader).
55  */
56 #define MS_BLOCK_SKIP   ((sizeof (MSBlockHeader) + 15) & ~15)
57
58 #define MS_BLOCK_FREE   (MS_BLOCK_SIZE - MS_BLOCK_SKIP)
59
60 #define MS_NUM_MARK_WORDS       (MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8)
61
62 /*
63  * Blocks progress from one state to the next:
64  *
65  * SWEPT           The block is fully swept.  It might or might not be in
66  *                 a free list.
67  *
68  * MARKING         The block might or might not contain live objects.  If
69  *                 we're in between an initial collection pause and the
70  *                 finishing pause, the block might or might not be in a
71  *                 free list.
72  *
73  * CHECKING        The sweep thread is investigating the block to determine
74  *                 whether or not it contains live objects.  The block is
75  *                 not in a free list.
76  *
77  * NEED_SWEEPING   The block contains live objects but has not yet been
78  *                 swept.  It also contains free slots.  It is in a block
79  *                 free list.
80  *
81  * SWEEPING        The block is being swept.  It might be in a free list.
82  */
83
84 enum {
85         BLOCK_STATE_SWEPT,
86         BLOCK_STATE_MARKING,
87         BLOCK_STATE_CHECKING,
88         BLOCK_STATE_NEED_SWEEPING,
89         BLOCK_STATE_SWEEPING
90 };
91
92 typedef struct _MSBlockInfo MSBlockInfo;
93 struct _MSBlockInfo {
94         guint16 obj_size;
95         /*
96          * FIXME: Do we even need this? It's only used during sweep and might be worth
97          * recalculating to save the space.
98          */
99         guint16 obj_size_index;
100         /* FIXME: Reduce this - it only needs a byte. */
101         volatile gint32 state;
102         gint16 nused;
103         unsigned int pinned : 1;
104         unsigned int has_references : 1;
105         unsigned int has_pinned : 1;    /* means cannot evacuate */
106         unsigned int is_to_space : 1;
107         void ** volatile free_list;
108         MSBlockInfo * volatile next_free;
109         guint8 * volatile cardtable_mod_union;
110         guint32 mark_words [MS_NUM_MARK_WORDS];
111 };
112
113 #define MS_BLOCK_FOR_BLOCK_INFO(b)      ((char*)(b))
114
115 #define MS_BLOCK_OBJ(b,i)               ((GCObject *)(MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i)))
116 #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size)             (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i))
117 #define MS_BLOCK_DATA_FOR_OBJ(o)        ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1)))
118
119 typedef struct {
120         MSBlockInfo info;
121 } MSBlockHeader;
122
123 #define MS_BLOCK_FOR_OBJ(o)             (&((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
124
125 /* object index will always be small */
126 #define MS_BLOCK_OBJ_INDEX(o,b) ((int)(((char*)(o) - (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP)) / (b)->obj_size))
127
128 //casting to int is fine since blocks are 32k
129 #define MS_CALC_MARK_BIT(w,b,o)         do {                            \
130                 int i = ((int)((char*)(o) - MS_BLOCK_DATA_FOR_OBJ ((o)))) >> SGEN_ALLOC_ALIGN_BITS; \
131                 (w) = i >> 5;                                           \
132                 (b) = i & 31;                                           \
133         } while (0)
134
135 #define MS_MARK_BIT(bl,w,b)     ((bl)->mark_words [(w)] & (ONE_P << (b)))
136 #define MS_SET_MARK_BIT(bl,w,b) ((bl)->mark_words [(w)] |= (ONE_P << (b)))
137
138 #define MS_OBJ_ALLOCED(o,b)     (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE))
139
140 #define MS_BLOCK_OBJ_SIZE_FACTOR        (pow (2.0, 1.0 / 3))
141
142 /*
143  * This way we can lookup block object size indexes for sizes up to
144  * 256 bytes with a single load.
145  */
146 #define MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES      32
147
148 static int *block_obj_sizes;
149 static int num_block_obj_sizes;
150 static int fast_block_obj_size_indexes [MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES];
151
152 #define MS_BLOCK_FLAG_PINNED    1
153 #define MS_BLOCK_FLAG_REFS      2
154
155 #define MS_BLOCK_TYPE_MAX       4
156
157 static gboolean *evacuate_block_obj_sizes;
158 static float evacuation_threshold = 0.666f;
159
160 static gboolean lazy_sweep = TRUE;
161
162 enum {
163         SWEEP_STATE_SWEPT,
164         SWEEP_STATE_NEED_SWEEPING,
165         SWEEP_STATE_SWEEPING,
166         SWEEP_STATE_SWEEPING_AND_ITERATING,
167         SWEEP_STATE_COMPACTING
168 };
169
170 static volatile int sweep_state = SWEEP_STATE_SWEPT;
171
172 static gboolean concurrent_mark;
173 static gboolean concurrent_sweep = TRUE;
174
175 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)      SGEN_POINTER_IS_TAGGED_1 ((bl))
176 #define BLOCK_TAG_HAS_REFERENCES(bl)            SGEN_POINTER_TAG_1 ((bl))
177
178 #define BLOCK_IS_TAGGED_CHECKING(bl)            SGEN_POINTER_IS_TAGGED_2 ((bl))
179 #define BLOCK_TAG_CHECKING(bl)                  SGEN_POINTER_TAG_2 ((bl))
180
181 #define BLOCK_UNTAG(bl)                         ((MSBlockInfo *)SGEN_POINTER_UNTAG_12 ((bl)))
182
183 #define BLOCK_TAG(bl)                           ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
184
185 /* all allocated blocks in the system */
186 static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_list_default_is_slot_set, sgen_array_list_default_cas_setter, INTERNAL_MEM_PIN_QUEUE);
187
188 /* non-allocated block free-list */
189 static void *empty_blocks = NULL;
190 static size_t num_empty_blocks = 0;
191
192 /*
193  * We can iterate the block list also while sweep is in progress but we
194  * need to account for blocks that will be checked for sweeping and even
195  * freed in the process.
196  */
197 #define FOREACH_BLOCK_NO_LOCK(bl) {                                     \
198         volatile gpointer *slot;                                                \
199         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
200                 (bl) = BLOCK_UNTAG (*slot);                             \
201                 if (!(bl))                                              \
202                         continue;
203 #define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                   \
204         volatile gpointer *slot;                                                \
205         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {        \
206                 (bl) = (MSBlockInfo *) (*slot);                 \
207                 if (!(bl))                                              \
208                         continue;                                       \
209                 (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
210                 (bl) = BLOCK_UNTAG ((bl));
211 #define END_FOREACH_BLOCK_NO_LOCK       } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
212
213 static volatile size_t num_major_sections = 0;
214 /*
215  * One free block list for each block object size.  We add and remove blocks from these
216  * lists lock-free via CAS.
217  *
218  * Blocks accessed/removed from `free_block_lists`:
219  *   from the mutator (with GC lock held)
220  *   in nursery collections
221  *   in non-concurrent major collections
222  *   in the finishing pause of concurrent major collections (whole list is cleared)
223  *
224  * Blocks added to `free_block_lists`:
225  *   in the sweeping thread
226  *   during nursery collections
227  *   from domain clearing (with the world stopped and no sweeping happening)
228  *
229  * The only item of those that doesn't require the GC lock is the sweep thread.  The sweep
230  * thread only ever adds blocks to the free list, so the ABA problem can't occur.
231  */
232 static MSBlockInfo * volatile *free_block_lists [MS_BLOCK_TYPE_MAX];
233
234 static guint64 stat_major_blocks_alloced = 0;
235 static guint64 stat_major_blocks_freed = 0;
236 static guint64 stat_major_blocks_lazy_swept = 0;
237
238 static guint64 stat_major_blocks_freed_ideal = 0;
239 static guint64 stat_major_blocks_freed_less_ideal = 0;
240 static guint64 stat_major_blocks_freed_individual = 0;
241 static guint64 stat_major_blocks_alloced_less_ideal = 0;
242
243 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
244 static guint64 num_major_objects_marked = 0;
245 #define INC_NUM_MAJOR_OBJECTS_MARKED()  (++num_major_objects_marked)
246 #else
247 #define INC_NUM_MAJOR_OBJECTS_MARKED()
248 #endif
249
250 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
251 static mono_mutex_t scanned_objects_list_lock;
252 static SgenPointerQueue scanned_objects_list;
253
254 static void
255 add_scanned_object (void *ptr)
256 {
257         if (!binary_protocol_is_enabled ())
258                 return;
259
260         mono_os_mutex_lock (&scanned_objects_list_lock);
261         sgen_pointer_queue_add (&scanned_objects_list, ptr);
262         mono_os_mutex_unlock (&scanned_objects_list_lock);
263 }
264 #endif
265
266 static gboolean sweep_block (MSBlockInfo *block);
267
268 static int
269 ms_find_block_obj_size_index (size_t size)
270 {
271         int i;
272         SGEN_ASSERT (9, size <= SGEN_MAX_SMALL_OBJ_SIZE, "size %zd is bigger than max small object size %d", size, SGEN_MAX_SMALL_OBJ_SIZE);
273         for (i = 0; i < num_block_obj_sizes; ++i)
274                 if (block_obj_sizes [i] >= size)
275                         return i;
276         g_error ("no object of size %zd\n", size);
277         return -1;
278 }
279
280 #define FREE_BLOCKS_FROM(lists,p,r)     (lists [((p) ? MS_BLOCK_FLAG_PINNED : 0) | ((r) ? MS_BLOCK_FLAG_REFS : 0)])
281 #define FREE_BLOCKS(p,r)                (FREE_BLOCKS_FROM (free_block_lists, (p), (r)))
282
283 #define MS_BLOCK_OBJ_SIZE_INDEX(s)                              \
284         (((s)+7)>>3 < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES ?      \
285          fast_block_obj_size_indexes [((s)+7)>>3] :             \
286          ms_find_block_obj_size_index ((s)))
287
288 static void*
289 major_alloc_heap (mword nursery_size, mword nursery_align, int the_nursery_bits)
290 {
291         char *start;
292         if (nursery_align)
293                 start = (char *)sgen_alloc_os_memory_aligned (nursery_size, nursery_align, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), "nursery", MONO_MEM_ACCOUNT_SGEN_NURSERY);
294         else
295                 start = (char *)sgen_alloc_os_memory (nursery_size, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), "nursery", MONO_MEM_ACCOUNT_SGEN_NURSERY);
296
297         return start;
298 }
299
300 static void
301 update_heap_boundaries_for_block (MSBlockInfo *block)
302 {
303         sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE);
304 }
305
306 /*
307  * Thread safe
308  */
309 static void*
310 ms_get_empty_block (void)
311 {
312         char *p;
313         int i;
314         void *block, *empty, *next;
315
316  retry:
317         if (!empty_blocks) {
318                 /*
319                  * We try allocating MS_BLOCK_ALLOC_NUM blocks first.  If that's
320                  * unsuccessful, we halve the number of blocks and try again, until we're at
321                  * 1.  If that doesn't work, either, we assert.
322                  */
323                 int alloc_num = MS_BLOCK_ALLOC_NUM;
324                 for (;;) {
325                         p = (char *)sgen_alloc_os_memory_aligned (MS_BLOCK_SIZE * alloc_num, MS_BLOCK_SIZE,
326                                 (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE),
327                                 alloc_num == 1 ? "major heap section" : NULL, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
328                         if (p)
329                                 break;
330                         alloc_num >>= 1;
331                 }
332
333                 for (i = 0; i < alloc_num; ++i) {
334                         block = p;
335                         /*
336                          * We do the free list update one after the
337                          * other so that other threads can use the new
338                          * blocks as quickly as possible.
339                          */
340                         do {
341                                 empty = empty_blocks;
342                                 *(void**)block = empty;
343                         } while (SGEN_CAS_PTR ((gpointer*)&empty_blocks, block, empty) != empty);
344                         p += MS_BLOCK_SIZE;
345                 }
346
347                 SGEN_ATOMIC_ADD_P (num_empty_blocks, alloc_num);
348
349                 stat_major_blocks_alloced += alloc_num;
350 #if SIZEOF_VOID_P != 8
351                 if (alloc_num != MS_BLOCK_ALLOC_NUM)
352                         stat_major_blocks_alloced_less_ideal += alloc_num;
353 #endif
354         }
355
356         do {
357                 empty = empty_blocks;
358                 if (!empty)
359                         goto retry;
360                 block = empty;
361                 next = *(void**)block;
362         } while (SGEN_CAS_PTR (&empty_blocks, next, empty) != empty);
363
364         SGEN_ATOMIC_ADD_P (num_empty_blocks, -1);
365
366         *(void**)block = NULL;
367
368         g_assert (!((mword)block & (MS_BLOCK_SIZE - 1)));
369
370         return block;
371 }
372
373 /*
374  * This doesn't actually free a block immediately, but enqueues it into the `empty_blocks`
375  * list, where it will either be freed later on, or reused in nursery collections.
376  */
377 static void
378 ms_free_block (MSBlockInfo *info)
379 {
380         void *empty;
381         char *block = MS_BLOCK_FOR_BLOCK_INFO (info);
382
383         sgen_memgov_release_space (MS_BLOCK_SIZE, SPACE_MAJOR);
384         if (info->cardtable_mod_union)
385                 sgen_card_table_free_mod_union (info->cardtable_mod_union, block, MS_BLOCK_SIZE);
386         memset (block, 0, MS_BLOCK_SIZE);
387
388         do {
389                 empty = empty_blocks;
390                 *(void**)block = empty;
391         } while (SGEN_CAS_PTR (&empty_blocks, block, empty) != empty);
392
393         SGEN_ATOMIC_ADD_P (num_empty_blocks, 1);
394
395         binary_protocol_block_free (block, MS_BLOCK_SIZE);
396 }
397
398 static gboolean
399 sweep_in_progress (void)
400 {
401         int state = sweep_state;
402         return state == SWEEP_STATE_SWEEPING ||
403                 state == SWEEP_STATE_SWEEPING_AND_ITERATING ||
404                 state == SWEEP_STATE_COMPACTING;
405 }
406
407 static inline gboolean
408 block_is_swept_or_marking (MSBlockInfo *block)
409 {
410         gint32 state = block->state;
411         return state == BLOCK_STATE_SWEPT || state == BLOCK_STATE_MARKING;
412 }
413
414 //#define MARKSWEEP_CONSISTENCY_CHECK
415
416 #ifdef MARKSWEEP_CONSISTENCY_CHECK
417 static void
418 check_block_free_list (MSBlockInfo *block, int size, gboolean pinned)
419 {
420         SGEN_ASSERT (0, !sweep_in_progress (), "Can't examine allocated blocks during sweep");
421         for (; block; block = block->next_free) {
422                 SGEN_ASSERT (0, block->state != BLOCK_STATE_CHECKING, "Can't have a block we're checking in a free list.");
423                 g_assert (block->obj_size == size);
424                 g_assert ((pinned && block->pinned) || (!pinned && !block->pinned));
425
426                 /* blocks in the free lists must have at least
427                    one free slot */
428                 g_assert (block->free_list);
429
430                 /* the block must be in the allocated_blocks array */
431                 g_assert (sgen_array_list_find (&allocated_blocks, BLOCK_TAG (block)) != (guint32)-1);
432         }
433 }
434
435 static void
436 check_empty_blocks (void)
437 {
438         void *p;
439         size_t i = 0;
440         for (p = empty_blocks; p; p = *(void**)p)
441                 ++i;
442         g_assert (i == num_empty_blocks);
443 }
444
445 static void
446 consistency_check (void)
447 {
448         MSBlockInfo *block;
449         int i;
450
451         /* check all blocks */
452         FOREACH_BLOCK_NO_LOCK (block) {
453                 int count = MS_BLOCK_FREE / block->obj_size;
454                 int num_free = 0;
455                 void **free;
456
457                 /* count number of free slots */
458                 for (i = 0; i < count; ++i) {
459                         void **obj = (void**) MS_BLOCK_OBJ (block, i);
460                         if (!MS_OBJ_ALLOCED (obj, block))
461                                 ++num_free;
462                 }
463
464                 /* check free list */
465                 for (free = block->free_list; free; free = (void**)*free) {
466                         g_assert (MS_BLOCK_FOR_OBJ (free) == block);
467                         --num_free;
468                 }
469                 g_assert (num_free == 0);
470
471                 /* check all mark words are zero */
472                 if (!sgen_concurrent_collection_in_progress () && block_is_swept_or_marking (block)) {
473                         for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
474                                 g_assert (block->mark_words [i] == 0);
475                 }
476         } END_FOREACH_BLOCK_NO_LOCK;
477
478         /* check free blocks */
479         for (i = 0; i < num_block_obj_sizes; ++i) {
480                 int j;
481                 for (j = 0; j < MS_BLOCK_TYPE_MAX; ++j)
482                         check_block_free_list (free_block_lists [j][i], block_obj_sizes [i], j & MS_BLOCK_FLAG_PINNED);
483         }
484
485         check_empty_blocks ();
486 }
487 #endif
488
489 static void
490 add_free_block (MSBlockInfo * volatile *free_blocks, int size_index, MSBlockInfo *block)
491 {
492         MSBlockInfo *old;
493         do {
494                 block->next_free = old = free_blocks [size_index];
495         } while (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], block, old) != old);
496 }
497
498 static void major_finish_sweep_checking (void);
499
500 static gboolean
501 ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
502 {
503         int size = block_obj_sizes [size_index];
504         int count = MS_BLOCK_FREE / size;
505         MSBlockInfo *info;
506         MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
507         char *obj_start;
508         int i;
509
510         if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR))
511                 return FALSE;
512
513         info = (MSBlockInfo*)ms_get_empty_block ();
514
515         SGEN_ASSERT (9, count >= 2, "block with %d objects, it must hold at least 2", count);
516
517         info->obj_size = size;
518         info->obj_size_index = size_index;
519         info->pinned = pinned;
520         info->has_references = has_references;
521         info->has_pinned = pinned;
522         /*
523          * Blocks that are to-space are not evacuated from.  During an major collection
524          * blocks are allocated for two reasons: evacuating objects from the nursery and
525          * evacuating them from major blocks marked for evacuation.  In both cases we don't
526          * want further evacuation. We also don't want to evacuate objects allocated during
527          * the concurrent mark since it would add pointless stress on the finishing pause.
528          */
529         info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD) || sgen_concurrent_collection_in_progress ();
530         info->state = info->is_to_space ? BLOCK_STATE_MARKING : BLOCK_STATE_SWEPT;
531         SGEN_ASSERT (6, !sweep_in_progress () || info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
532         info->cardtable_mod_union = NULL;
533
534         update_heap_boundaries_for_block (info);
535
536         binary_protocol_block_alloc (info, MS_BLOCK_SIZE);
537
538         /* build free list */
539         obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP;
540         info->free_list = (void**)obj_start;
541         /* we're skipping the last one - it must be nulled */
542         for (i = 0; i < count - 1; ++i) {
543                 char *next_obj_start = obj_start + size;
544                 *(void**)obj_start = next_obj_start;
545                 obj_start = next_obj_start;
546         }
547         /* the last one */
548         *(void**)obj_start = NULL;
549
550         add_free_block (free_blocks, size_index, info);
551
552         sgen_array_list_add (&allocated_blocks, BLOCK_TAG (info), 0, FALSE);
553
554         SGEN_ATOMIC_ADD_P (num_major_sections, 1);
555         return TRUE;
556 }
557
558 static gboolean
559 ptr_is_in_major_block (char *ptr, char **start, gboolean *pinned)
560 {
561         MSBlockInfo *block;
562
563         FOREACH_BLOCK_NO_LOCK (block) {
564                 if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) {
565                         int count = MS_BLOCK_FREE / block->obj_size;
566                         int i;
567
568                         if (start)
569                                 *start = NULL;
570                         for (i = 0; i <= count; ++i) {
571                                 if (ptr >= (char*)MS_BLOCK_OBJ (block, i) && ptr < (char*)MS_BLOCK_OBJ (block, i + 1)) {
572                                         if (start)
573                                                 *start = (char *)MS_BLOCK_OBJ (block, i);
574                                         break;
575                                 }
576                         }
577                         if (pinned)
578                                 *pinned = block->pinned;
579                         return TRUE;
580                 }
581         } END_FOREACH_BLOCK_NO_LOCK;
582         return FALSE;
583 }
584
585 static gboolean
586 ptr_is_from_pinned_alloc (char *ptr)
587 {
588         gboolean pinned;
589         if (ptr_is_in_major_block (ptr, NULL, &pinned))
590                 return pinned;
591         return FALSE;
592 }
593
594 static void
595 ensure_can_access_block_free_list (MSBlockInfo *block)
596 {
597  retry:
598         for (;;) {
599                 switch (block->state) {
600                 case BLOCK_STATE_SWEPT:
601                 case BLOCK_STATE_MARKING:
602                         return;
603                 case BLOCK_STATE_CHECKING:
604                         SGEN_ASSERT (0, FALSE, "How did we get a block that's being checked from a free list?");
605                         break;
606                 case BLOCK_STATE_NEED_SWEEPING:
607                         if (sweep_block (block))
608                                 ++stat_major_blocks_lazy_swept;
609                         break;
610                 case BLOCK_STATE_SWEEPING:
611                         /* FIXME: do this more elegantly */
612                         g_usleep (100);
613                         goto retry;
614                 default:
615                         SGEN_ASSERT (0, FALSE, "Illegal block state");
616                         break;
617                 }
618         }
619 }
620
621 static void*
622 unlink_slot_from_free_list_uncontested (MSBlockInfo * volatile *free_blocks, int size_index)
623 {
624         MSBlockInfo *block, *next_free_block;
625         void *obj, *next_free_slot;
626
627  retry:
628         block = free_blocks [size_index];
629         SGEN_ASSERT (9, block, "no free block to unlink from free_blocks %p size_index %d", free_blocks, size_index);
630
631         ensure_can_access_block_free_list (block);
632
633         obj = block->free_list;
634         SGEN_ASSERT (6, obj, "block %p in free list had no available object to alloc from", block);
635
636         next_free_slot = *(void**)obj;
637         if (next_free_slot) {
638                 block->free_list = (gpointer *)next_free_slot;
639                 return obj;
640         }
641
642         next_free_block = block->next_free;
643         if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free_block, block) != block)
644                 goto retry;
645
646         block->free_list = NULL;
647         block->next_free = NULL;
648
649         return obj;
650 }
651
652 static GCObject*
653 alloc_obj (GCVTable vtable, size_t size, gboolean pinned, gboolean has_references)
654 {
655         int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
656         MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
657         void *obj;
658
659         if (!free_blocks [size_index]) {
660                 if (G_UNLIKELY (!ms_alloc_block (size_index, pinned, has_references)))
661                         return NULL;
662         }
663
664         obj = unlink_slot_from_free_list_uncontested (free_blocks, size_index);
665
666         /* FIXME: assumes object layout */
667         *(GCVTable*)obj = vtable;
668
669         total_allocated_major += block_obj_sizes [size_index]; 
670
671         return (GCObject *)obj;
672 }
673
674 static GCObject*
675 major_alloc_object (GCVTable vtable, size_t size, gboolean has_references)
676 {
677         return alloc_obj (vtable, size, FALSE, has_references);
678 }
679
680 /*
681  * We're not freeing the block if it's empty.  We leave that work for
682  * the next major collection.
683  *
684  * This is just called from the domain clearing code, which runs in a
685  * single thread and has the GC lock, so we don't need an extra lock.
686  */
687 static void
688 free_object (GCObject *obj, size_t size, gboolean pinned)
689 {
690         MSBlockInfo *block = MS_BLOCK_FOR_OBJ (obj);
691         int word, bit;
692         gboolean in_free_list;
693
694         SGEN_ASSERT (9, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
695
696         ensure_can_access_block_free_list (block);
697         SGEN_ASSERT (9, (pinned && block->pinned) || (!pinned && !block->pinned), "free-object pinning mixup object %p pinned %d block %p pinned %d", obj, pinned, block, block->pinned);
698         SGEN_ASSERT (9, MS_OBJ_ALLOCED (obj, block), "object %p is already free", obj);
699         MS_CALC_MARK_BIT (word, bit, obj);
700         SGEN_ASSERT (9, !MS_MARK_BIT (block, word, bit), "object %p has mark bit set", obj);
701
702         memset (obj, 0, size);
703
704         in_free_list = !!block->free_list;
705         *(void**)obj = block->free_list;
706         block->free_list = (void**)obj;
707
708         if (!in_free_list) {
709                 MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (pinned, block->has_references);
710                 int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
711                 SGEN_ASSERT (9, !block->next_free, "block %p doesn't have a free-list of object but belongs to a free-list of blocks", block);
712                 add_free_block (free_blocks, size_index, block);
713         }
714 }
715
716 static void
717 major_free_non_pinned_object (GCObject *obj, size_t size)
718 {
719         free_object (obj, size, FALSE);
720 }
721
722 /* size is a multiple of SGEN_ALLOC_ALIGN */
723 static GCObject*
724 major_alloc_small_pinned_obj (GCVTable vtable, size_t size, gboolean has_references)
725 {
726         void *res;
727
728         res = alloc_obj (vtable, size, TRUE, has_references);
729          /*If we failed to alloc memory, we better try releasing memory
730           *as pinned alloc is requested by the runtime.
731           */
732          if (!res) {
733                 sgen_perform_collection (0, GENERATION_OLD, "pinned alloc failure", TRUE, TRUE);
734                 res = alloc_obj (vtable, size, TRUE, has_references);
735          }
736          return (GCObject *)res;
737 }
738
739 static void
740 free_pinned_object (GCObject *obj, size_t size)
741 {
742         free_object (obj, size, TRUE);
743 }
744
745 /*
746  * size is already rounded up and we hold the GC lock.
747  */
748 static GCObject*
749 major_alloc_degraded (GCVTable vtable, size_t size)
750 {
751         GCObject *obj;
752
753         obj = alloc_obj (vtable, size, FALSE, SGEN_VTABLE_HAS_REFERENCES (vtable));
754         if (G_LIKELY (obj)) {
755                 HEAVY_STAT (++stat_objects_alloced_degraded);
756                 HEAVY_STAT (stat_bytes_alloced_degraded += size);
757         }
758         return obj;
759 }
760
761 /*
762  * obj is some object.  If it's not in the major heap (i.e. if it's in
763  * the nursery or LOS), return FALSE.  Otherwise return whether it's
764  * been marked or copied.
765  */
766 static gboolean
767 major_is_object_live (GCObject *obj)
768 {
769         MSBlockInfo *block;
770         int word, bit;
771         mword objsize;
772
773         if (sgen_ptr_in_nursery (obj))
774                 return FALSE;
775
776         objsize = SGEN_ALIGN_UP (sgen_safe_object_get_size (obj));
777
778         /* LOS */
779         if (objsize > SGEN_MAX_SMALL_OBJ_SIZE)
780                 return FALSE;
781
782         /* now we know it's in a major block */
783         block = MS_BLOCK_FOR_OBJ (obj);
784         SGEN_ASSERT (9, !block->pinned, "block %p is pinned, BTW why is this bad?", block);
785         MS_CALC_MARK_BIT (word, bit, obj);
786         return MS_MARK_BIT (block, word, bit) ? TRUE : FALSE;
787 }
788
789 static gboolean
790 major_ptr_is_in_non_pinned_space (char *ptr, char **start)
791 {
792         gboolean pinned;
793         if (ptr_is_in_major_block (ptr, start, &pinned))
794                 return !pinned;
795         return FALSE;
796 }
797
798 static gboolean
799 try_set_sweep_state (int new_, int expected)
800 {
801         int old = SGEN_CAS (&sweep_state, new_, expected);
802         return old == expected;
803 }
804
805 static void
806 set_sweep_state (int new_, int expected)
807 {
808         gboolean success = try_set_sweep_state (new_, expected);
809         SGEN_ASSERT (0, success, "Could not set sweep state.");
810 }
811
812 static gboolean ensure_block_is_checked_for_sweeping (guint32 block_index, gboolean wait, gboolean *have_checked);
813
814 static SgenThreadPoolJob * volatile sweep_job;
815 static SgenThreadPoolJob * volatile sweep_blocks_job;
816
817 static void
818 major_finish_sweep_checking (void)
819 {
820         guint32 block_index;
821         SgenThreadPoolJob *job;
822
823  retry:
824         switch (sweep_state) {
825         case SWEEP_STATE_SWEPT:
826         case SWEEP_STATE_NEED_SWEEPING:
827                 return;
828         case SWEEP_STATE_SWEEPING:
829                 if (try_set_sweep_state (SWEEP_STATE_SWEEPING_AND_ITERATING, SWEEP_STATE_SWEEPING))
830                         break;
831                 goto retry;
832         case SWEEP_STATE_SWEEPING_AND_ITERATING:
833                 SGEN_ASSERT (0, FALSE, "Is there another minor collection running?");
834                 goto retry;
835         case SWEEP_STATE_COMPACTING:
836                 goto wait;
837         default:
838                 SGEN_ASSERT (0, FALSE, "Invalid sweep state.");
839                 break;
840         }
841
842         /*
843          * We're running with the world stopped and the only other thread doing work is the
844          * sweep thread, which doesn't add blocks to the array, so we can safely access
845          * `next_slot`.
846          */
847         for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index)
848                 ensure_block_is_checked_for_sweeping (block_index, FALSE, NULL);
849
850         set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_SWEEPING_AND_ITERATING);
851
852  wait:
853         job = sweep_job;
854         if (job)
855                 sgen_thread_pool_job_wait (job);
856         SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
857         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
858 }
859
860 static void
861 major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc callback, void *data)
862 {
863         gboolean sweep = flags & ITERATE_OBJECTS_SWEEP;
864         gboolean non_pinned = flags & ITERATE_OBJECTS_NON_PINNED;
865         gboolean pinned = flags & ITERATE_OBJECTS_PINNED;
866         MSBlockInfo *block;
867
868         /* No actual sweeping will take place if we are in the middle of a major collection. */
869         major_finish_sweep_checking ();
870         FOREACH_BLOCK_NO_LOCK (block) {
871                 int count = MS_BLOCK_FREE / block->obj_size;
872                 int i;
873
874                 if (block->pinned && !pinned)
875                         continue;
876                 if (!block->pinned && !non_pinned)
877                         continue;
878                 if (sweep && lazy_sweep && !block_is_swept_or_marking (block)) {
879                         sweep_block (block);
880                         SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
881                 }
882
883                 for (i = 0; i < count; ++i) {
884                         void **obj = (void**) MS_BLOCK_OBJ (block, i);
885                         if (MS_OBJ_ALLOCED (obj, block))
886                                 callback ((GCObject*)obj, block->obj_size, data);
887                 }
888         } END_FOREACH_BLOCK_NO_LOCK;
889 }
890
891 static gboolean
892 major_is_valid_object (char *object)
893 {
894         MSBlockInfo *block;
895
896         FOREACH_BLOCK_NO_LOCK (block) {
897                 int idx;
898                 char *obj;
899
900                 if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= object))
901                         continue;
902
903                 idx = MS_BLOCK_OBJ_INDEX (object, block);
904                 obj = (char*)MS_BLOCK_OBJ (block, idx);
905                 if (obj != object)
906                         return FALSE;
907                 return MS_OBJ_ALLOCED (obj, block);
908         } END_FOREACH_BLOCK_NO_LOCK;
909
910         return FALSE;
911 }
912
913
914 static GCVTable
915 major_describe_pointer (char *ptr)
916 {
917         MSBlockInfo *block;
918
919         FOREACH_BLOCK_NO_LOCK (block) {
920                 int idx;
921                 char *obj;
922                 gboolean live;
923                 GCVTable vtable;
924                 int w, b;
925                 gboolean marked;
926
927                 if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= ptr))
928                         continue;
929
930                 SGEN_LOG (0, "major-ptr (block %p sz %d pin %d ref %d)\n",
931                         MS_BLOCK_FOR_BLOCK_INFO (block), block->obj_size, block->pinned, block->has_references);
932
933                 idx = MS_BLOCK_OBJ_INDEX (ptr, block);
934                 obj = (char*)MS_BLOCK_OBJ (block, idx);
935                 live = MS_OBJ_ALLOCED (obj, block);
936                 vtable = live ? SGEN_LOAD_VTABLE ((GCObject*)obj) : NULL;
937
938                 MS_CALC_MARK_BIT (w, b, obj);
939                 marked = MS_MARK_BIT (block, w, b);
940
941                 if (obj == ptr) {
942                         SGEN_LOG (0, "\t(");
943                         if (live)
944                                 SGEN_LOG (0, "object");
945                         else
946                                 SGEN_LOG (0, "dead-object");
947                 } else {
948                         if (live)
949                                 SGEN_LOG (0, "interior-ptr offset %zd", ptr - obj);
950                         else
951                                 SGEN_LOG (0, "dead-interior-ptr offset %zd", ptr - obj);
952                 }
953
954                 SGEN_LOG (0, " marked %d)\n", marked ? 1 : 0);
955
956                 return vtable;
957         } END_FOREACH_BLOCK_NO_LOCK;
958
959         return NULL;
960 }
961
962 static void
963 major_check_scan_starts (void)
964 {
965 }
966
967 static void
968 major_dump_heap (FILE *heap_dump_file)
969 {
970         MSBlockInfo *block;
971         int *slots_available = (int *)alloca (sizeof (int) * num_block_obj_sizes);
972         int *slots_used = (int *)alloca (sizeof (int) * num_block_obj_sizes);
973         int i;
974
975         for (i = 0; i < num_block_obj_sizes; ++i)
976                 slots_available [i] = slots_used [i] = 0;
977
978         FOREACH_BLOCK_NO_LOCK (block) {
979                 int index = ms_find_block_obj_size_index (block->obj_size);
980                 int count = MS_BLOCK_FREE / block->obj_size;
981
982                 slots_available [index] += count;
983                 for (i = 0; i < count; ++i) {
984                         if (MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block))
985                                 ++slots_used [index];
986                 }
987         } END_FOREACH_BLOCK_NO_LOCK;
988
989         fprintf (heap_dump_file, "<occupancies>\n");
990         for (i = 0; i < num_block_obj_sizes; ++i) {
991                 fprintf (heap_dump_file, "<occupancy size=\"%d\" available=\"%d\" used=\"%d\" />\n",
992                                 block_obj_sizes [i], slots_available [i], slots_used [i]);
993         }
994         fprintf (heap_dump_file, "</occupancies>\n");
995
996         FOREACH_BLOCK_NO_LOCK (block) {
997                 int count = MS_BLOCK_FREE / block->obj_size;
998                 int i;
999                 int start = -1;
1000
1001                 fprintf (heap_dump_file, "<section type=\"%s\" size=\"%zu\">\n", "old", (size_t)MS_BLOCK_FREE);
1002
1003                 for (i = 0; i <= count; ++i) {
1004                         if ((i < count) && MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block)) {
1005                                 if (start < 0)
1006                                         start = i;
1007                         } else {
1008                                 if (start >= 0) {
1009                                         sgen_dump_occupied ((char *)MS_BLOCK_OBJ (block, start), (char *)MS_BLOCK_OBJ (block, i), MS_BLOCK_FOR_BLOCK_INFO (block));
1010                                         start = -1;
1011                                 }
1012                         }
1013                 }
1014
1015                 fprintf (heap_dump_file, "</section>\n");
1016         } END_FOREACH_BLOCK_NO_LOCK;
1017 }
1018
1019 static guint8*
1020 get_cardtable_mod_union_for_block (MSBlockInfo *block, gboolean allocate)
1021 {
1022         guint8 *mod_union = block->cardtable_mod_union;
1023         guint8 *other;
1024         if (mod_union)
1025                 return mod_union;
1026         else if (!allocate)
1027                 return NULL;
1028         mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
1029         other = (guint8 *)SGEN_CAS_PTR ((gpointer*)&block->cardtable_mod_union, mod_union, NULL);
1030         if (!other) {
1031                 SGEN_ASSERT (0, block->cardtable_mod_union == mod_union, "Why did CAS not replace?");
1032                 return mod_union;
1033         }
1034         sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
1035         return other;
1036 }
1037
1038 static inline guint8*
1039 major_get_cardtable_mod_union_for_reference (char *ptr)
1040 {
1041         MSBlockInfo *block = MS_BLOCK_FOR_OBJ (ptr);
1042         size_t offset = sgen_card_table_get_card_offset (ptr, (char*)sgen_card_table_align_pointer (MS_BLOCK_FOR_BLOCK_INFO (block)));
1043         guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE);
1044         SGEN_ASSERT (0, mod_union, "FIXME: optionally allocate the mod union if it's not here and CAS it in.");
1045         return &mod_union [offset];
1046 }
1047
1048 /*
1049  * Mark the mod-union card for `ptr`, which must be a reference within the object `obj`.
1050  */
1051 static void
1052 mark_mod_union_card (GCObject *obj, void **ptr, GCObject *value_obj)
1053 {
1054         int type = sgen_obj_get_descriptor (obj) & DESC_TYPE_MASK;
1055         if (sgen_safe_object_is_small (obj, type)) {
1056                 guint8 *card_byte = major_get_cardtable_mod_union_for_reference ((char*)ptr);
1057                 SGEN_ASSERT (0, MS_BLOCK_FOR_OBJ (obj) == MS_BLOCK_FOR_OBJ (ptr), "How can an object and a reference inside it not be in the same block?");
1058                 *card_byte = 1;
1059         } else {
1060                 sgen_los_mark_mod_union_card (obj, ptr);
1061         }
1062         binary_protocol_mod_union_remset (obj, ptr, value_obj, SGEN_LOAD_VTABLE (value_obj));
1063 }
1064
1065 static inline gboolean
1066 major_block_is_evacuating (MSBlockInfo *block)
1067 {
1068         if (evacuate_block_obj_sizes [block->obj_size_index] &&
1069                         !block->has_pinned &&
1070                         !block->is_to_space)
1071                 return TRUE;
1072         return FALSE;
1073 }
1074
1075 #define MS_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {           \
1076                 int __word, __bit;                                      \
1077                 MS_CALC_MARK_BIT (__word, __bit, (obj));                \
1078                 SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
1079                 if (!MS_MARK_BIT ((block), __word, __bit)) {            \
1080                         MS_SET_MARK_BIT ((block), __word, __bit);       \
1081                         if (sgen_gc_descr_has_references (desc))                        \
1082                                 GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
1083                         binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
1084                         INC_NUM_MAJOR_OBJECTS_MARKED ();                \
1085                 }                                                       \
1086         } while (0)
1087
1088 static void
1089 pin_major_object (GCObject *obj, SgenGrayQueue *queue)
1090 {
1091         MSBlockInfo *block;
1092
1093         if (concurrent_mark)
1094                 g_assert_not_reached ();
1095
1096         block = MS_BLOCK_FOR_OBJ (obj);
1097         block->has_pinned = TRUE;
1098         MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
1099 }
1100
1101 #include "sgen-major-copy-object.h"
1102
1103 static long long
1104 major_get_and_reset_num_major_objects_marked (void)
1105 {
1106 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
1107         long long num = num_major_objects_marked;
1108         num_major_objects_marked = 0;
1109         return num;
1110 #else
1111         return 0;
1112 #endif
1113 }
1114
1115 #define PREFETCH_CARDS          1       /* BOOL FASTENABLE */
1116 #if !PREFETCH_CARDS
1117 #undef PREFETCH_CARDS
1118 #endif
1119
1120 /* gcc 4.2.1 from xcode4 crashes on sgen_card_table_get_card_address () when this is enabled */
1121 #if defined(PLATFORM_MACOSX)
1122 #if MONO_GNUC_VERSION <= 40300
1123 #undef PREFETCH_CARDS
1124 #endif
1125 #endif
1126
1127 #ifdef HEAVY_STATISTICS
1128 static guint64 stat_optimized_copy;
1129 static guint64 stat_optimized_copy_nursery;
1130 static guint64 stat_optimized_copy_nursery_forwarded;
1131 static guint64 stat_optimized_copy_nursery_pinned;
1132 static guint64 stat_optimized_copy_major;
1133 static guint64 stat_optimized_copy_major_small_fast;
1134 static guint64 stat_optimized_copy_major_small_slow;
1135 static guint64 stat_optimized_copy_major_large;
1136 static guint64 stat_optimized_copy_major_forwarded;
1137 static guint64 stat_optimized_copy_major_small_evacuate;
1138 static guint64 stat_optimized_major_scan;
1139 static guint64 stat_optimized_major_scan_no_refs;
1140
1141 static guint64 stat_drain_prefetch_fills;
1142 static guint64 stat_drain_prefetch_fill_failures;
1143 static guint64 stat_drain_loops;
1144 #endif
1145
1146 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_no_evacuation
1147 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_no_evacuation
1148 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_no_evacuation
1149 #include "sgen-marksweep-drain-gray-stack.h"
1150
1151 #define COPY_OR_MARK_WITH_EVACUATION
1152 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_with_evacuation
1153 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_with_evacuation
1154 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_with_evacuation
1155 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_with_evacuation
1156 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_with_evacuation
1157 #include "sgen-marksweep-drain-gray-stack.h"
1158
1159 #define COPY_OR_MARK_CONCURRENT
1160 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_no_evacuation
1161 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_no_evacuation
1162 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_no_evacuation
1163 #include "sgen-marksweep-drain-gray-stack.h"
1164
1165 #define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
1166 #define COPY_OR_MARK_FUNCTION_NAME      major_copy_or_mark_object_concurrent_with_evacuation
1167 #define SCAN_OBJECT_FUNCTION_NAME       major_scan_object_concurrent_with_evacuation
1168 #define SCAN_VTYPE_FUNCTION_NAME        major_scan_vtype_concurrent_with_evacuation
1169 #define SCAN_PTR_FIELD_FUNCTION_NAME    major_scan_ptr_field_concurrent_with_evacuation
1170 #define DRAIN_GRAY_STACK_FUNCTION_NAME  drain_gray_stack_concurrent_with_evacuation
1171 #include "sgen-marksweep-drain-gray-stack.h"
1172
1173 static inline gboolean
1174 major_is_evacuating (void)
1175 {
1176         int i;
1177         for (i = 0; i < num_block_obj_sizes; ++i) {
1178                 if (evacuate_block_obj_sizes [i]) {
1179                         return TRUE;
1180                 }
1181         }
1182
1183         return FALSE;
1184 }
1185
1186 static gboolean
1187 drain_gray_stack (SgenGrayQueue *queue)
1188 {
1189         if (major_is_evacuating ())
1190                 return drain_gray_stack_with_evacuation (queue);
1191         else
1192                 return drain_gray_stack_no_evacuation (queue);
1193 }
1194
1195 static gboolean
1196 drain_gray_stack_concurrent (SgenGrayQueue *queue)
1197 {
1198         if (major_is_evacuating ())
1199                 return drain_gray_stack_concurrent_with_evacuation (queue);
1200         else
1201                 return drain_gray_stack_concurrent_no_evacuation (queue);
1202 }
1203
1204 static void
1205 major_copy_or_mark_object_canonical (GCObject **ptr, SgenGrayQueue *queue)
1206 {
1207         major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
1208 }
1209
1210 static void
1211 major_copy_or_mark_object_concurrent_canonical (GCObject **ptr, SgenGrayQueue *queue)
1212 {
1213         major_copy_or_mark_object_concurrent_with_evacuation (ptr, *ptr, queue);
1214 }
1215
1216 static void
1217 major_copy_or_mark_object_concurrent_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
1218 {
1219         major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
1220 }
1221
1222 static void
1223 mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue)
1224 {
1225         void **entry, **end;
1226         int last_index = -1;
1227
1228         if (first_entry == last_entry)
1229                 return;
1230
1231         entry = sgen_pinning_get_entry (first_entry);
1232         end = sgen_pinning_get_entry (last_entry);
1233
1234         for (; entry < end; ++entry) {
1235                 int index = MS_BLOCK_OBJ_INDEX (*entry, block);
1236                 GCObject *obj;
1237                 SGEN_ASSERT (9, index >= 0 && index < MS_BLOCK_FREE / block->obj_size, "invalid object %p index %d max-index %d", *entry, index, (int)(MS_BLOCK_FREE / block->obj_size));
1238                 if (index == last_index)
1239                         continue;
1240                 obj = MS_BLOCK_OBJ (block, index);
1241                 if (!MS_OBJ_ALLOCED (obj, block))
1242                         continue;
1243                 MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
1244                 sgen_pin_stats_register_object (obj, GENERATION_OLD);
1245                 last_index = index;
1246         }
1247
1248         /*
1249          * There might have been potential pinning "pointers" into this block, but none of
1250          * them pointed to occupied slots, in which case we don't have to pin the block.
1251          */
1252         if (last_index >= 0)
1253                 block->has_pinned = TRUE;
1254 }
1255
1256 static inline void
1257 sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
1258 {
1259         int obj_index;
1260
1261         for (obj_index = 0; obj_index < count; ++obj_index) {
1262                 int word, bit;
1263                 void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, obj_index, obj_size);
1264
1265                 MS_CALC_MARK_BIT (word, bit, obj);
1266                 if (MS_MARK_BIT (block, word, bit)) {
1267                         SGEN_ASSERT (9, MS_OBJ_ALLOCED (obj, block), "object %p not allocated", obj);
1268                 } else {
1269                         /* an unmarked object */
1270                         if (MS_OBJ_ALLOCED (obj, block)) {
1271                                 /*
1272                                  * FIXME: Merge consecutive
1273                                  * slots for lower reporting
1274                                  * overhead.  Maybe memset
1275                                  * will also benefit?
1276                                  */
1277                                 binary_protocol_empty (obj, obj_size);
1278                                 memset (obj, 0, obj_size);
1279                         }
1280                         *(void**)obj = block->free_list;
1281                         block->free_list = (void **)obj;
1282                 }
1283         }
1284 }
1285
1286 static inline gboolean
1287 try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
1288 {
1289         gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
1290         gboolean success = old_state == expected_state;
1291         if (success)
1292                 binary_protocol_block_set_state (block, MS_BLOCK_SIZE, old_state, new_state);
1293         return success;
1294 }
1295
1296 static inline void
1297 set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
1298 {
1299         SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
1300         block->state = new_state;
1301         binary_protocol_block_set_state (block, MS_BLOCK_SIZE, expected_state, new_state);
1302 }
1303
1304 /*
1305  * If `block` needs sweeping, sweep it and return TRUE.  Otherwise return FALSE.
1306  *
1307  * Sweeping means iterating through the block's slots and building the free-list from the
1308  * unmarked ones.  They will also be zeroed.  The mark bits will be reset.
1309  */
1310 static gboolean
1311 sweep_block (MSBlockInfo *block)
1312 {
1313         int count;
1314         void *reversed = NULL;
1315
1316  retry:
1317         switch (block->state) {
1318         case BLOCK_STATE_SWEPT:
1319                 return FALSE;
1320         case BLOCK_STATE_MARKING:
1321         case BLOCK_STATE_CHECKING:
1322                 SGEN_ASSERT (0, FALSE, "How did we get to sweep a block that's being marked or being checked?");
1323                 goto retry;
1324         case BLOCK_STATE_SWEEPING:
1325                 /* FIXME: Do this more elegantly */
1326                 g_usleep (100);
1327                 goto retry;
1328         case BLOCK_STATE_NEED_SWEEPING:
1329                 if (!try_set_block_state (block, BLOCK_STATE_SWEEPING, BLOCK_STATE_NEED_SWEEPING))
1330                         goto retry;
1331                 break;
1332         default:
1333                 SGEN_ASSERT (0, FALSE, "Illegal block state");
1334         }
1335
1336         SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
1337
1338         count = MS_BLOCK_FREE / block->obj_size;
1339
1340         block->free_list = NULL;
1341
1342         /* Use inline instances specialized to constant sizes, this allows the compiler to replace the memset calls with inline code */
1343         // FIXME: Add more sizes
1344         switch (block->obj_size) {
1345         case 16:
1346                 sweep_block_for_size (block, count, 16);
1347                 break;
1348         default:
1349                 sweep_block_for_size (block, count, block->obj_size);
1350                 break;
1351         }
1352
1353         /* reset mark bits */
1354         memset (block->mark_words, 0, sizeof (guint32) * MS_NUM_MARK_WORDS);
1355
1356         /* Reverse free list so that it's in address order */
1357         reversed = NULL;
1358         while (block->free_list) {
1359                 void *next = *(void**)block->free_list;
1360                 *(void**)block->free_list = reversed;
1361                 reversed = block->free_list;
1362                 block->free_list = (void **)next;
1363         }
1364         block->free_list = (void **)reversed;
1365
1366         mono_memory_write_barrier ();
1367
1368         set_block_state (block, BLOCK_STATE_SWEPT, BLOCK_STATE_SWEEPING);
1369
1370         return TRUE;
1371 }
1372
1373 static inline int
1374 bitcount (mword d)
1375 {
1376         int count = 0;
1377
1378 #ifdef __GNUC__
1379         if (sizeof (mword) == 8)
1380                 count += __builtin_popcountll (d);
1381         else
1382                 count += __builtin_popcount (d);
1383 #else
1384         while (d) {
1385                 count ++;
1386                 d &= (d - 1);
1387         }
1388 #endif
1389         return count;
1390 }
1391
1392 /* statistics for evacuation */
1393 static size_t *sweep_slots_available;
1394 static size_t *sweep_slots_used;
1395 static size_t *sweep_num_blocks;
1396
1397 static volatile size_t num_major_sections_before_sweep;
1398 static volatile size_t num_major_sections_freed_in_sweep;
1399
1400 static void
1401 sweep_start (void)
1402 {
1403         int i;
1404
1405         for (i = 0; i < num_block_obj_sizes; ++i)
1406                 sweep_slots_available [i] = sweep_slots_used [i] = sweep_num_blocks [i] = 0;
1407
1408         /* clear all the free lists */
1409         for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i) {
1410                 MSBlockInfo * volatile *free_blocks = free_block_lists [i];
1411                 int j;
1412                 for (j = 0; j < num_block_obj_sizes; ++j)
1413                         free_blocks [j] = NULL;
1414         }
1415 }
1416
1417 static void sweep_finish (void);
1418
1419 /*
1420  * If `wait` is TRUE and the block is currently being checked, this function will wait until
1421  * the checking has finished.
1422  *
1423  * Returns whether the block is still there.  If `wait` is FALSE, the return value will not
1424  * be correct, i.e. must not be used.
1425  */
1426 static gboolean
1427 ensure_block_is_checked_for_sweeping (guint32 block_index, gboolean wait, gboolean *have_checked)
1428 {
1429         int count;
1430         gboolean have_live = FALSE;
1431         gboolean have_free = FALSE;
1432         int nused = 0;
1433         int block_state;
1434         int i;
1435         void *tagged_block;
1436         MSBlockInfo *block;
1437         volatile gpointer *block_slot = sgen_array_list_get_slot (&allocated_blocks, block_index);
1438
1439         SGEN_ASSERT (6, sweep_in_progress (), "Why do we call this function if there's no sweep in progress?");
1440
1441         if (have_checked)
1442                 *have_checked = FALSE;
1443
1444  retry:
1445         tagged_block = *(void * volatile *)block_slot;
1446         if (!tagged_block)
1447                 return FALSE;
1448
1449         if (BLOCK_IS_TAGGED_CHECKING (tagged_block)) {
1450                 if (!wait)
1451                         return FALSE;
1452                 /* FIXME: do this more elegantly */
1453                 g_usleep (100);
1454                 goto retry;
1455         }
1456
1457         if (SGEN_CAS_PTR (block_slot, BLOCK_TAG_CHECKING (tagged_block), tagged_block) != tagged_block)
1458                 goto retry;
1459
1460         block = BLOCK_UNTAG (tagged_block);
1461         block_state = block->state;
1462
1463         if (!sweep_in_progress ()) {
1464                 SGEN_ASSERT (6, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
1465                 if (!lazy_sweep)
1466                         SGEN_ASSERT (6, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
1467         }
1468
1469         switch (block_state) {
1470         case BLOCK_STATE_SWEPT:
1471         case BLOCK_STATE_NEED_SWEEPING:
1472         case BLOCK_STATE_SWEEPING:
1473                 goto done;
1474         case BLOCK_STATE_MARKING:
1475                 break;
1476         case BLOCK_STATE_CHECKING:
1477                 SGEN_ASSERT (0, FALSE, "We set the CHECKING bit - how can the stage be CHECKING?");
1478                 goto done;
1479         default:
1480                 SGEN_ASSERT (0, FALSE, "Illegal block state");
1481                 break;
1482         }
1483
1484         SGEN_ASSERT (6, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
1485         set_block_state (block, BLOCK_STATE_CHECKING, BLOCK_STATE_MARKING);
1486
1487         if (have_checked)
1488                 *have_checked = TRUE;
1489
1490         block->has_pinned = block->pinned;
1491
1492         block->is_to_space = FALSE;
1493
1494         count = MS_BLOCK_FREE / block->obj_size;
1495
1496         if (block->cardtable_mod_union)
1497                 memset (block->cardtable_mod_union, 0, CARDS_PER_BLOCK);
1498
1499         /* Count marked objects in the block */
1500         for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
1501                 nused += bitcount (block->mark_words [i]);
1502
1503         block->nused = nused;
1504         if (nused)
1505                 have_live = TRUE;
1506         if (nused < count)
1507                 have_free = TRUE;
1508
1509         if (have_live) {
1510                 int obj_size_index = block->obj_size_index;
1511                 gboolean has_pinned = block->has_pinned;
1512
1513                 set_block_state (block, BLOCK_STATE_NEED_SWEEPING, BLOCK_STATE_CHECKING);
1514
1515                 /*
1516                  * FIXME: Go straight to SWEPT if there are no free slots.  We need
1517                  * to set the free slot list to NULL, though, and maybe update some
1518                  * statistics.
1519                  */
1520                 if (!lazy_sweep)
1521                         sweep_block (block);
1522
1523                 if (!has_pinned) {
1524                         ++sweep_num_blocks [obj_size_index];
1525                         sweep_slots_used [obj_size_index] += nused;
1526                         sweep_slots_available [obj_size_index] += count;
1527                 }
1528
1529                 /*
1530                  * If there are free slots in the block, add
1531                  * the block to the corresponding free list.
1532                  */
1533                 if (have_free) {
1534                         MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
1535
1536                         if (!lazy_sweep)
1537                                 SGEN_ASSERT (6, block->free_list, "How do we not have a free list when there are free slots?");
1538
1539                         add_free_block (free_blocks, obj_size_index, block);
1540                 }
1541
1542                 /* FIXME: Do we need the heap boundaries while we do nursery collections? */
1543                 update_heap_boundaries_for_block (block);
1544         } else {
1545                 /*
1546                  * Blocks without live objects are removed from the
1547                  * block list and freed.
1548                  */
1549                 SGEN_ASSERT (6, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
1550                 SGEN_ASSERT (6, *block_slot == BLOCK_TAG_CHECKING (tagged_block), "How did the block move?");
1551
1552                 binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
1553                 ms_free_block (block);
1554
1555                 SGEN_ATOMIC_ADD_P (num_major_sections, -1);
1556                 SGEN_ATOMIC_ADD_P (num_major_sections_freed_in_sweep, 1);
1557
1558                 tagged_block = NULL;
1559         }
1560
1561  done:
1562         /*
1563          * Once the block is written back without the checking bit other threads are
1564          * free to access it. Make sure the block state is visible before we write it
1565          * back.
1566          */
1567         mono_memory_write_barrier ();
1568         *block_slot = tagged_block;
1569         return !!tagged_block;
1570 }
1571
1572 static void
1573 sweep_blocks_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
1574 {
1575         volatile gpointer *slot;
1576         MSBlockInfo *bl;
1577
1578         SGEN_ARRAY_LIST_FOREACH_SLOT (&allocated_blocks, slot) {
1579                 bl = BLOCK_UNTAG (*slot);
1580                 if (bl)
1581                         sweep_block (bl);
1582         } SGEN_ARRAY_LIST_END_FOREACH_SLOT;
1583
1584         mono_memory_write_barrier ();
1585
1586         sweep_blocks_job = NULL;
1587 }
1588
1589 static void
1590 sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
1591 {
1592         guint32 block_index;
1593         guint32 num_blocks = num_major_sections_before_sweep;
1594
1595         SGEN_ASSERT (0, sweep_in_progress (), "Sweep thread called with wrong state");
1596         SGEN_ASSERT (0, num_blocks <= allocated_blocks.next_slot, "How did we lose blocks?");
1597
1598         /*
1599          * We traverse the block array from high to low.  Nursery collections will have to
1600          * cooperate with the sweep thread to finish sweeping, and they will traverse from
1601          * low to high, to avoid constantly colliding on the same blocks.
1602          */
1603         for (block_index = allocated_blocks.next_slot; block_index-- > 0;) {
1604                 ensure_block_is_checked_for_sweeping (block_index, TRUE, NULL);
1605         }
1606
1607         while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
1608                 /*
1609                  * The main GC thread is currently iterating over the block array to help us
1610                  * finish the sweep.  We have already finished, but we don't want to mess up
1611                  * that iteration, so we just wait for it.
1612                  */
1613                 g_usleep (100);
1614         }
1615
1616         if (SGEN_MAX_ASSERT_LEVEL >= 6) {
1617                 for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
1618                         MSBlockInfo *block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, block_index));
1619                         SGEN_ASSERT (6, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
1620                 }
1621         }
1622
1623         /*
1624          * Concurrently sweep all the blocks to reduce workload during minor
1625          * pauses where we need certain blocks to be swept. At the start of
1626          * the next major we need all blocks to be swept anyway.
1627          */
1628         if (concurrent_sweep && lazy_sweep) {
1629                 sweep_blocks_job = sgen_thread_pool_job_alloc ("sweep_blocks", sweep_blocks_job_func, sizeof (SgenThreadPoolJob));
1630                 sgen_thread_pool_job_enqueue (sweep_blocks_job);
1631         }
1632
1633         sweep_finish ();
1634
1635         sweep_job = NULL;
1636 }
1637
1638 static void
1639 sweep_finish (void)
1640 {
1641         mword used_slots_size = 0;
1642         int i;
1643
1644         for (i = 0; i < num_block_obj_sizes; ++i) {
1645                 float usage = (float)sweep_slots_used [i] / (float)sweep_slots_available [i];
1646                 if (sweep_num_blocks [i] > 5 && usage < evacuation_threshold) {
1647                         evacuate_block_obj_sizes [i] = TRUE;
1648                         /*
1649                         g_print ("slot size %d - %d of %d used\n",
1650                                         block_obj_sizes [i], slots_used [i], slots_available [i]);
1651                         */
1652                 } else {
1653                         evacuate_block_obj_sizes [i] = FALSE;
1654                 }
1655
1656                 used_slots_size += sweep_slots_used [i] * block_obj_sizes [i];
1657         }
1658
1659         sgen_memgov_major_post_sweep (used_slots_size);
1660
1661         set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
1662         if (concurrent_sweep)
1663                 binary_protocol_concurrent_sweep_end (sgen_timestamp ());
1664 }
1665
1666 static void
1667 major_sweep (void)
1668 {
1669         set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_NEED_SWEEPING);
1670
1671         sweep_start ();
1672
1673         num_major_sections_before_sweep = num_major_sections;
1674         num_major_sections_freed_in_sweep = 0;
1675
1676         SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
1677         if (concurrent_sweep) {
1678                 sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
1679                 sgen_thread_pool_job_enqueue (sweep_job);
1680         } else {
1681                 sweep_job_func (NULL, NULL);
1682         }
1683 }
1684
1685 static gboolean
1686 major_have_swept (void)
1687 {
1688         return sweep_state == SWEEP_STATE_SWEPT;
1689 }
1690
1691 static int count_pinned_ref;
1692 static int count_pinned_nonref;
1693 static int count_nonpinned_ref;
1694 static int count_nonpinned_nonref;
1695
1696 static void
1697 count_nonpinned_callback (GCObject *obj, size_t size, void *data)
1698 {
1699         GCVTable vtable = SGEN_LOAD_VTABLE (obj);
1700
1701         if (SGEN_VTABLE_HAS_REFERENCES (vtable))
1702                 ++count_nonpinned_ref;
1703         else
1704                 ++count_nonpinned_nonref;
1705 }
1706
1707 static void
1708 count_pinned_callback (GCObject *obj, size_t size, void *data)
1709 {
1710         GCVTable vtable = SGEN_LOAD_VTABLE (obj);
1711
1712         if (SGEN_VTABLE_HAS_REFERENCES (vtable))
1713                 ++count_pinned_ref;
1714         else
1715                 ++count_pinned_nonref;
1716 }
1717
1718 static G_GNUC_UNUSED void
1719 count_ref_nonref_objs (void)
1720 {
1721         int total;
1722
1723         count_pinned_ref = 0;
1724         count_pinned_nonref = 0;
1725         count_nonpinned_ref = 0;
1726         count_nonpinned_nonref = 0;
1727
1728         major_iterate_objects (ITERATE_OBJECTS_SWEEP_NON_PINNED, count_nonpinned_callback, NULL);
1729         major_iterate_objects (ITERATE_OBJECTS_SWEEP_PINNED, count_pinned_callback, NULL);
1730
1731         total = count_pinned_nonref + count_nonpinned_nonref + count_pinned_ref + count_nonpinned_ref;
1732
1733         g_print ("ref: %d pinned %d non-pinned   non-ref: %d pinned %d non-pinned  --  %.1f\n",
1734                         count_pinned_ref, count_nonpinned_ref,
1735                         count_pinned_nonref, count_nonpinned_nonref,
1736                         (count_pinned_nonref + count_nonpinned_nonref) * 100.0 / total);
1737 }
1738
1739 static int
1740 ms_calculate_block_obj_sizes (double factor, int *arr)
1741 {
1742         double target_size;
1743         int num_sizes = 0;
1744         int last_size = 0;
1745
1746         /*
1747          * Have every possible slot size starting with the minimal
1748          * object size up to and including four times that size.  Then
1749          * proceed by increasing geometrically with the given factor.
1750          */
1751
1752         for (int size = SGEN_CLIENT_MINIMUM_OBJECT_SIZE; size <= 4 * SGEN_CLIENT_MINIMUM_OBJECT_SIZE; size += SGEN_ALLOC_ALIGN) {
1753                 if (arr)
1754                         arr [num_sizes] = size;
1755                 ++num_sizes;
1756                 last_size = size;
1757         }
1758         target_size = (double)last_size;
1759
1760         do {
1761                 int target_count = (int)floor (MS_BLOCK_FREE / target_size);
1762                 int size = MIN ((MS_BLOCK_FREE / target_count) & ~(SGEN_ALLOC_ALIGN - 1), SGEN_MAX_SMALL_OBJ_SIZE);
1763
1764                 if (size != last_size) {
1765                         if (arr)
1766                                 arr [num_sizes] = size;
1767                         ++num_sizes;
1768                         last_size = size;
1769                 }
1770
1771                 target_size *= factor;
1772         } while (last_size < SGEN_MAX_SMALL_OBJ_SIZE);
1773
1774         return num_sizes;
1775 }
1776
1777 /* only valid during minor collections */
1778 static mword old_num_major_sections;
1779
1780 static void
1781 major_start_nursery_collection (void)
1782 {
1783 #ifdef MARKSWEEP_CONSISTENCY_CHECK
1784         consistency_check ();
1785 #endif
1786
1787         old_num_major_sections = num_major_sections;
1788 }
1789
1790 static void
1791 major_finish_nursery_collection (void)
1792 {
1793 #ifdef MARKSWEEP_CONSISTENCY_CHECK
1794         consistency_check ();
1795 #endif
1796 }
1797
1798 static int
1799 block_usage_comparer (const void *bl1, const void *bl2)
1800 {
1801         const gint16 nused1 = (*(MSBlockInfo**)bl1)->nused;
1802         const gint16 nused2 = (*(MSBlockInfo**)bl2)->nused;
1803
1804         return nused2 - nused1;
1805 }
1806
1807 static void
1808 sgen_evacuation_freelist_blocks (MSBlockInfo * volatile *block_list, int size_index)
1809 {
1810         MSBlockInfo **evacuated_blocks;
1811         size_t index = 0, count, num_blocks = 0, num_used = 0;
1812         MSBlockInfo *info;
1813         MSBlockInfo * volatile *prev;
1814
1815         for (info = *block_list; info != NULL; info = info->next_free) {
1816                 num_blocks++;
1817                 num_used += info->nused;
1818         }
1819
1820         /*
1821          * We have a set of blocks in the freelist which will be evacuated. Instead
1822          * of evacuating all of the blocks into new ones, we traverse the freelist
1823          * sorting it by the number of occupied slots, evacuating the objects from
1824          * blocks with fewer used slots into fuller blocks.
1825          *
1826          * The number of used slots is set at the end of the previous sweep. Since
1827          * we sequentially unlink slots from blocks, except for the head of the
1828          * freelist, for blocks on the freelist, the number of used slots is the same
1829          * as at the end of the previous sweep.
1830          */
1831         evacuated_blocks = (MSBlockInfo**)sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY, TRUE);
1832
1833         for (info = *block_list; info != NULL; info = info->next_free) {
1834                 evacuated_blocks [index++] = info;
1835         }
1836
1837         SGEN_ASSERT (0, num_blocks == index, "Why did the freelist change ?");
1838
1839         sgen_qsort (evacuated_blocks, num_blocks, sizeof (gpointer), block_usage_comparer);
1840
1841         /*
1842          * Form a new freelist with the fullest blocks. These blocks will also be
1843          * marked as to_space so we don't evacuate from them.
1844          */
1845         count = MS_BLOCK_FREE / block_obj_sizes [size_index];
1846         prev = block_list;
1847         for (index = 0; index < (num_used + count - 1) / count; index++) {
1848                 SGEN_ASSERT (0, index < num_blocks, "Why do we need more blocks for compaction than we already had ?");
1849                 info = evacuated_blocks [index];
1850                 info->is_to_space = TRUE;
1851                 *prev = info;
1852                 prev = &info->next_free;
1853         }
1854         *prev = NULL;
1855
1856         sgen_free_internal_dynamic (evacuated_blocks, sizeof (MSBlockInfo*) * num_blocks, INTERNAL_MEM_TEMPORARY);
1857 }
1858
1859 static void
1860 major_start_major_collection (void)
1861 {
1862         MSBlockInfo *block;
1863         int i;
1864
1865         major_finish_sweep_checking ();
1866
1867         /*
1868          * Clear the free lists for block sizes where we do evacuation.  For those block
1869          * sizes we will have to allocate new blocks.
1870          */
1871         for (i = 0; i < num_block_obj_sizes; ++i) {
1872                 if (!evacuate_block_obj_sizes [i])
1873                         continue;
1874
1875                 binary_protocol_evacuating_blocks (block_obj_sizes [i]);
1876
1877                 sgen_evacuation_freelist_blocks (&free_block_lists [0][i], i);
1878                 sgen_evacuation_freelist_blocks (&free_block_lists [MS_BLOCK_FLAG_REFS][i], i);
1879         }
1880
1881         if (lazy_sweep && concurrent_sweep) {
1882                 /*
1883                  * sweep_blocks_job is created before sweep_finish, which we wait for above
1884                  * (major_finish_sweep_checking). After the end of sweep, if we don't have
1885                  * sweep_blocks_job set, it means that it has already been run.
1886                  */
1887                 SgenThreadPoolJob *job = sweep_blocks_job;
1888                 if (job)
1889                         sgen_thread_pool_job_wait (job);
1890         }
1891
1892         if (lazy_sweep && !concurrent_sweep)
1893                 binary_protocol_sweep_begin (GENERATION_OLD, TRUE);
1894         /* Sweep all unswept blocks and set them to MARKING */
1895         FOREACH_BLOCK_NO_LOCK (block) {
1896                 if (lazy_sweep && !concurrent_sweep)
1897                         sweep_block (block);
1898                 SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "All blocks must be swept when we're pinning.");
1899                 set_block_state (block, BLOCK_STATE_MARKING, BLOCK_STATE_SWEPT);
1900                 /*
1901                  * Swept blocks that have a null free_list are full. Evacuation is not
1902                  * effective on these blocks since we expect them to have high usage anyway,
1903                  * given that the survival rate for majors is relatively high.
1904                  */
1905                 if (evacuate_block_obj_sizes [block->obj_size_index] && !block->free_list)
1906                         block->is_to_space = TRUE;
1907         } END_FOREACH_BLOCK_NO_LOCK;
1908         if (lazy_sweep && !concurrent_sweep)
1909                 binary_protocol_sweep_end (GENERATION_OLD, TRUE);
1910
1911         set_sweep_state (SWEEP_STATE_NEED_SWEEPING, SWEEP_STATE_SWEPT);
1912 }
1913
1914 static void
1915 major_finish_major_collection (ScannedObjectCounts *counts)
1916 {
1917 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
1918         if (binary_protocol_is_enabled ()) {
1919                 counts->num_scanned_objects = scanned_objects_list.next_slot;
1920
1921                 sgen_pointer_queue_sort_uniq (&scanned_objects_list);
1922                 counts->num_unique_scanned_objects = scanned_objects_list.next_slot;
1923
1924                 sgen_pointer_queue_clear (&scanned_objects_list);
1925         }
1926 #endif
1927 }
1928
1929 static int
1930 compare_pointers (const void *va, const void *vb) {
1931         char *a = *(char**)va, *b = *(char**)vb;
1932         if (a < b)
1933                 return -1;
1934         if (a > b)
1935                 return 1;
1936         return 0;
1937 }
1938
1939 /*
1940  * This is called with sweep completed and the world stopped.
1941  */
1942 static void
1943 major_free_swept_blocks (size_t section_reserve)
1944 {
1945         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
1946
1947 #ifdef TARGET_WIN32
1948                 /*
1949                  * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of
1950                  * a VirtualAlloc ()-ed block.
1951                  */
1952                 return;
1953 #endif
1954
1955         {
1956                 int i, num_empty_blocks_orig, num_blocks, arr_length;
1957                 void *block;
1958                 void **empty_block_arr;
1959                 void **rebuild_next;
1960
1961                 if (num_empty_blocks <= section_reserve)
1962                         return;
1963                 SGEN_ASSERT (0, num_empty_blocks > 0, "section reserve can't be negative");
1964
1965                 num_empty_blocks_orig = num_empty_blocks;
1966                 empty_block_arr = (void**)sgen_alloc_internal_dynamic (sizeof (void*) * num_empty_blocks_orig,
1967                                 INTERNAL_MEM_MS_BLOCK_INFO_SORT, FALSE);
1968                 if (!empty_block_arr)
1969                         goto fallback;
1970
1971                 i = 0;
1972                 for (block = empty_blocks; block; block = *(void**)block)
1973                         empty_block_arr [i++] = block;
1974                 SGEN_ASSERT (0, i == num_empty_blocks, "empty block count wrong");
1975
1976                 sgen_qsort (empty_block_arr, num_empty_blocks, sizeof (void*), compare_pointers);
1977
1978                 /*
1979                  * We iterate over the free blocks, trying to find MS_BLOCK_ALLOC_NUM
1980                  * contiguous ones.  If we do, we free them.  If that's not enough to get to
1981                  * section_reserve, we halve the number of contiguous blocks we're looking
1982                  * for and have another go, until we're done with looking for pairs of
1983                  * blocks, at which point we give up and go to the fallback.
1984                  */
1985                 arr_length = num_empty_blocks_orig;
1986                 num_blocks = MS_BLOCK_ALLOC_NUM;
1987                 while (num_empty_blocks > section_reserve && num_blocks > 1) {
1988                         int first = -1;
1989                         int dest = 0;
1990
1991                         dest = 0;
1992                         for (i = 0; i < arr_length; ++i) {
1993                                 int d = dest;
1994                                 void *block = empty_block_arr [i];
1995                                 SGEN_ASSERT (6, block, "we're not shifting correctly");
1996                                 if (i != dest) {
1997                                         empty_block_arr [dest] = block;
1998                                         /*
1999                                          * This is not strictly necessary, but we're
2000                                          * cautious.
2001                                          */
2002                                         empty_block_arr [i] = NULL;
2003                                 }
2004                                 ++dest;
2005
2006                                 if (first < 0) {
2007                                         first = d;
2008                                         continue;
2009                                 }
2010
2011                                 SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
2012
2013                                 if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) {
2014                                         first = d;
2015                                         continue;
2016                                 }
2017
2018                                 if (d + 1 - first == num_blocks) {
2019                                         /*
2020                                          * We found num_blocks contiguous blocks.  Free them
2021                                          * and null their array entries.  As an optimization
2022                                          * we could, instead of nulling the entries, shift
2023                                          * the following entries over to the left, while
2024                                          * we're iterating.
2025                                          */
2026                                         int j;
2027                                         sgen_free_os_memory (empty_block_arr [first], MS_BLOCK_SIZE * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
2028                                         for (j = first; j <= d; ++j)
2029                                                 empty_block_arr [j] = NULL;
2030                                         dest = first;
2031                                         first = -1;
2032
2033                                         num_empty_blocks -= num_blocks;
2034
2035                                         stat_major_blocks_freed += num_blocks;
2036                                         if (num_blocks == MS_BLOCK_ALLOC_NUM)
2037                                                 stat_major_blocks_freed_ideal += num_blocks;
2038                                         else
2039                                                 stat_major_blocks_freed_less_ideal += num_blocks;
2040
2041                                 }
2042                         }
2043
2044                         SGEN_ASSERT (6, dest <= i && dest <= arr_length, "array length is off");
2045                         arr_length = dest;
2046                         SGEN_ASSERT (6, arr_length == num_empty_blocks, "array length is off");
2047
2048                         num_blocks >>= 1;
2049                 }
2050
2051                 /* rebuild empty_blocks free list */
2052                 rebuild_next = (void**)&empty_blocks;
2053                 for (i = 0; i < arr_length; ++i) {
2054                         void *block = empty_block_arr [i];
2055                         SGEN_ASSERT (6, block, "we're missing blocks");
2056                         *rebuild_next = block;
2057                         rebuild_next = (void**)block;
2058                 }
2059                 *rebuild_next = NULL;
2060
2061                 /* free array */
2062                 sgen_free_internal_dynamic (empty_block_arr, sizeof (void*) * num_empty_blocks_orig, INTERNAL_MEM_MS_BLOCK_INFO_SORT);
2063         }
2064
2065         SGEN_ASSERT (0, num_empty_blocks >= 0, "we freed more blocks than we had in the first place?");
2066
2067  fallback:
2068         /*
2069          * This is our threshold.  If there's not more empty than used blocks, we won't
2070          * release uncontiguous blocks, in fear of fragmenting the address space.
2071          */
2072         if (num_empty_blocks <= num_major_sections)
2073                 return;
2074
2075         while (num_empty_blocks > section_reserve) {
2076                 void *next = *(void**)empty_blocks;
2077                 sgen_free_os_memory (empty_blocks, MS_BLOCK_SIZE, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
2078                 empty_blocks = next;
2079                 /*
2080                  * Needs not be atomic because this is running
2081                  * single-threaded.
2082                  */
2083                 --num_empty_blocks;
2084
2085                 ++stat_major_blocks_freed;
2086                 ++stat_major_blocks_freed_individual;
2087         }
2088 }
2089
2090 static void
2091 major_pin_objects (SgenGrayQueue *queue)
2092 {
2093         MSBlockInfo *block;
2094
2095         FOREACH_BLOCK_NO_LOCK (block) {
2096                 size_t first_entry, last_entry;
2097                 SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
2098                 sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
2099                                 &first_entry, &last_entry);
2100                 mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
2101         } END_FOREACH_BLOCK_NO_LOCK;
2102 }
2103
2104 static void
2105 major_init_to_space (void)
2106 {
2107 }
2108
2109 static void
2110 major_report_pinned_memory_usage (void)
2111 {
2112         g_assert_not_reached ();
2113 }
2114
2115 static gint64
2116 major_get_used_size (void)
2117 {
2118         gint64 size = 0;
2119         MSBlockInfo *block;
2120
2121         /*
2122          * We're holding the GC lock, but the sweep thread might be running.  Make sure it's
2123          * finished, then we can iterate over the block array.
2124          */
2125         major_finish_sweep_checking ();
2126
2127         FOREACH_BLOCK_NO_LOCK (block) {
2128                 int count = MS_BLOCK_FREE / block->obj_size;
2129                 void **iter;
2130                 size += count * block->obj_size;
2131                 for (iter = block->free_list; iter; iter = (void**)*iter)
2132                         size -= block->obj_size;
2133         } END_FOREACH_BLOCK_NO_LOCK;
2134
2135         return size;
2136 }
2137
2138 /* FIXME: return number of bytes, not of sections */
2139 static size_t
2140 get_num_major_sections (void)
2141 {
2142         return num_major_sections;
2143 }
2144
2145 /*
2146  * Returns the number of bytes in blocks that were present when the last sweep was
2147  * initiated, and were not freed during the sweep.  They are the basis for calculating the
2148  * allowance.
2149  */
2150 static size_t
2151 get_bytes_survived_last_sweep (void)
2152 {
2153         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep");
2154         return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE;
2155 }
2156
2157 static gboolean
2158 major_handle_gc_param (const char *opt)
2159 {
2160         if (g_str_has_prefix (opt, "evacuation-threshold=")) {
2161                 const char *arg = strchr (opt, '=') + 1;
2162                 int percentage = atoi (arg);
2163                 if (percentage < 0 || percentage > 100) {
2164                         fprintf (stderr, "evacuation-threshold must be an integer in the range 0-100.\n");
2165                         exit (1);
2166                 }
2167                 evacuation_threshold = (float)percentage / 100.0f;
2168                 return TRUE;
2169         } else if (!strcmp (opt, "lazy-sweep")) {
2170                 lazy_sweep = TRUE;
2171                 return TRUE;
2172         } else if (!strcmp (opt, "no-lazy-sweep")) {
2173                 lazy_sweep = FALSE;
2174                 return TRUE;
2175         } else if (!strcmp (opt, "concurrent-sweep")) {
2176                 concurrent_sweep = TRUE;
2177                 return TRUE;
2178         } else if (!strcmp (opt, "no-concurrent-sweep")) {
2179                 concurrent_sweep = FALSE;
2180                 return TRUE;
2181         }
2182
2183         return FALSE;
2184 }
2185
2186 static void
2187 major_print_gc_param_usage (void)
2188 {
2189         fprintf (stderr,
2190                         ""
2191                         "  evacuation-threshold=P (where P is a percentage, an integer in 0-100)\n"
2192                         "  (no-)lazy-sweep\n"
2193                         "  (no-)concurrent-sweep\n"
2194                         );
2195 }
2196
2197 /*
2198  * This callback is used to clear cards, move cards to the shadow table and do counting.
2199  */
2200 static void
2201 major_iterate_block_ranges (sgen_cardtable_block_callback callback)
2202 {
2203         MSBlockInfo *block;
2204         gboolean has_references;
2205
2206         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2207                 if (has_references)
2208                         callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
2209         } END_FOREACH_BLOCK_NO_LOCK;
2210 }
2211
2212 static void
2213 major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
2214 {
2215         MSBlockInfo *block;
2216         gboolean has_references;
2217
2218         major_finish_sweep_checking ();
2219         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2220                 if (has_references)
2221                         callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
2222         } END_FOREACH_BLOCK_NO_LOCK;
2223 }
2224
2225 #ifdef HEAVY_STATISTICS
2226 extern guint64 marked_cards;
2227 extern guint64 scanned_cards;
2228 extern guint64 scanned_objects;
2229 extern guint64 remarked_cards;
2230 #endif
2231
2232 #define CARD_WORDS_PER_BLOCK (CARDS_PER_BLOCK / SIZEOF_VOID_P)
2233 /*
2234  * MS blocks are 16K aligned.
2235  * Cardtables are 4K aligned, at least.
2236  * This means that the cardtable of a given block is 32 bytes aligned.
2237  */
2238 static guint8*
2239 initial_skip_card (guint8 *card_data)
2240 {
2241         mword *cards = (mword*)card_data;
2242         mword card;
2243         int i;
2244         for (i = 0; i < CARD_WORDS_PER_BLOCK; ++i) {
2245                 card = cards [i];
2246                 if (card)
2247                         break;
2248         }
2249
2250         if (i == CARD_WORDS_PER_BLOCK)
2251                 return card_data + CARDS_PER_BLOCK;
2252
2253 #if defined(__i386__) && defined(__GNUC__)
2254         return card_data + i * 4 +  (__builtin_ffs (card) - 1) / 8;
2255 #elif defined(__x86_64__) && defined(__GNUC__)
2256         return card_data + i * 8 +  (__builtin_ffsll (card) - 1) / 8;
2257 #elif defined(__s390x__) && defined(__GNUC__)
2258         return card_data + i * 8 +  (__builtin_ffsll (GUINT64_TO_LE(card)) - 1) / 8;
2259 #else
2260         for (i = i * SIZEOF_VOID_P; i < CARDS_PER_BLOCK; ++i) {
2261                 if (card_data [i])
2262                         return &card_data [i];
2263         }
2264         return card_data;
2265 #endif
2266 }
2267
2268 #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os) (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os))
2269 #define MS_BLOCK_OBJ_FAST(b,os,i)                       ((b) + MS_BLOCK_SKIP + (os) * (i))
2270 #define MS_OBJ_ALLOCED_FAST(o,b)                (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE))
2271
2272 static void
2273 scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx)
2274 {
2275         SgenGrayQueue *queue = ctx.queue;
2276         ScanObjectFunc scan_func = ctx.ops->scan_object;
2277 #ifndef SGEN_HAVE_OVERLAPPING_CARDS
2278         guint8 cards_copy [CARDS_PER_BLOCK];
2279 #endif
2280         guint8 cards_preclean [CARDS_PER_BLOCK];
2281         gboolean small_objects;
2282         int block_obj_size;
2283         char *block_start;
2284         guint8 *card_data, *card_base;
2285         guint8 *card_data_end;
2286         char *scan_front = NULL;
2287
2288         /* The concurrent mark doesn't enter evacuating blocks */
2289         if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN && major_block_is_evacuating (block))
2290                 return;
2291
2292         block_obj_size = block->obj_size;
2293         small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
2294
2295         block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
2296
2297         /*
2298          * This is safe in face of card aliasing for the following reason:
2299          *
2300          * Major blocks are 16k aligned, or 32 cards aligned.
2301          * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
2302          * sizes, they won't overflow the cardtable overlap modulus.
2303          */
2304         if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
2305                 card_data = card_base = block->cardtable_mod_union;
2306                 /*
2307                  * This happens when the nursery collection that precedes finishing
2308                  * the concurrent collection allocates new major blocks.
2309                  */
2310                 if (!card_data)
2311                         return;
2312
2313                 if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN) {
2314                         sgen_card_table_preclean_mod_union (card_data, cards_preclean, CARDS_PER_BLOCK);
2315                         card_data = card_base = cards_preclean;
2316                 }
2317         } else {
2318 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
2319                 card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
2320 #else
2321                 if (!sgen_card_table_get_card_data (cards_copy, (mword)block_start, CARDS_PER_BLOCK))
2322                         return;
2323                 card_data = card_base = cards_copy;
2324 #endif
2325         }
2326         card_data_end = card_data + CARDS_PER_BLOCK;
2327
2328         card_data += MS_BLOCK_SKIP >> CARD_BITS;
2329
2330         card_data = initial_skip_card (card_data);
2331         while (card_data < card_data_end) {
2332                 size_t card_index, first_object_index;
2333                 char *start;
2334                 char *end;
2335                 char *first_obj, *obj;
2336
2337                 HEAVY_STAT (++scanned_cards);
2338
2339                 if (!*card_data) {
2340                         ++card_data;
2341                         continue;
2342                 }
2343
2344                 card_index = card_data - card_base;
2345                 start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
2346                 end = start + CARD_SIZE_IN_BYTES;
2347
2348                 if (!block_is_swept_or_marking (block))
2349                         sweep_block (block);
2350
2351                 HEAVY_STAT (++marked_cards);
2352
2353                 if (small_objects)
2354                         sgen_card_table_prepare_card_for_scanning (card_data);
2355
2356                 /*
2357                  * If the card we're looking at starts at or in the block header, we
2358                  * must start at the first object in the block, without calculating
2359                  * the index of the object we're hypothetically starting at, because
2360                  * it would be negative.
2361                  */
2362                 if (card_index <= (MS_BLOCK_SKIP >> CARD_BITS))
2363                         first_object_index = 0;
2364                 else
2365                         first_object_index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
2366
2367                 obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
2368
2369                 binary_protocol_card_scan (first_obj, end - first_obj);
2370
2371                 while (obj < end) {
2372                         if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
2373                                 goto next_object;
2374
2375                         if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
2376                                 /* FIXME: do this more efficiently */
2377                                 int w, b;
2378                                 MS_CALC_MARK_BIT (w, b, obj);
2379                                 if (!MS_MARK_BIT (block, w, b))
2380                                         goto next_object;
2381                         }
2382
2383                         GCObject *object = (GCObject*)obj;
2384
2385                         if (small_objects) {
2386                                 HEAVY_STAT (++scanned_objects);
2387                                 scan_func (object, sgen_obj_get_descriptor (object), queue);
2388                         } else {
2389                                 size_t offset = sgen_card_table_get_card_offset (obj, block_start);
2390                                 sgen_cardtable_scan_object (object, block_obj_size, card_base + offset, ctx);
2391                         }
2392                 next_object:
2393                         obj += block_obj_size;
2394                         g_assert (scan_front <= obj);
2395                         scan_front = obj;
2396                 }
2397
2398                 HEAVY_STAT (if (*card_data) ++remarked_cards);
2399
2400                 if (small_objects)
2401                         ++card_data;
2402                 else
2403                         card_data = card_base + sgen_card_table_get_card_offset (obj, block_start);
2404         }
2405 }
2406
2407 static void
2408 major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
2409 {
2410         MSBlockInfo *block;
2411         gboolean has_references, was_sweeping, skip_scan;
2412
2413         if (!concurrent_mark)
2414                 g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
2415
2416         if (scan_type != CARDTABLE_SCAN_GLOBAL)
2417                 SGEN_ASSERT (0, !sweep_in_progress (), "Sweep should be finished when we scan mod union card table");
2418         was_sweeping = sweep_in_progress ();
2419
2420         binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
2421         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2422 #ifdef PREFETCH_CARDS
2423                 int prefetch_index = __index + 6;
2424                 if (prefetch_index < allocated_blocks.next_slot) {
2425                         MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
2426                         PREFETCH_READ (prefetch_block);
2427                         if (scan_type == CARDTABLE_SCAN_GLOBAL) {
2428                                 guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
2429                                 PREFETCH_WRITE (prefetch_cards);
2430                                 PREFETCH_WRITE (prefetch_cards + 32);
2431                         }
2432                 }
2433 #endif
2434
2435                 if (!has_references)
2436                         continue;
2437                 skip_scan = FALSE;
2438
2439                 if (scan_type == CARDTABLE_SCAN_GLOBAL) {
2440                         gpointer *card_start = (gpointer*) sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (block));
2441                         gboolean has_dirty_cards = FALSE;
2442                         int i;
2443                         for (i = 0; i < CARDS_PER_BLOCK / sizeof(gpointer); i++) {
2444                                 if (card_start [i]) {
2445                                         has_dirty_cards = TRUE;
2446                                         break;
2447                                 }
2448                         }
2449                         if (!has_dirty_cards) {
2450                                 skip_scan = TRUE;
2451                         } else {
2452                                 /*
2453                                  * After the start of the concurrent collections, blocks change state
2454                                  * to marking. We should not sweep it in that case. We can't race with
2455                                  * sweep start since we are in a nursery collection. Also avoid CAS-ing
2456                                  */
2457                                 if (sweep_in_progress ()) {
2458                                         skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL);
2459                                 } else if (was_sweeping) {
2460                                         /* Recheck in case sweep finished after dereferencing the slot */
2461                                         skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0;
2462                                 }
2463                         }
2464                 }
2465                 if (!skip_scan)
2466                         scan_card_table_for_block (block, scan_type, ctx);
2467         } END_FOREACH_BLOCK_NO_LOCK;
2468         binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
2469 }
2470
2471 static void
2472 major_count_cards (long long *num_total_cards, long long *num_marked_cards)
2473 {
2474         MSBlockInfo *block;
2475         gboolean has_references;
2476         long long total_cards = 0;
2477         long long marked_cards = 0;
2478
2479         if (sweep_in_progress ()) {
2480                 *num_total_cards = -1;
2481                 *num_marked_cards = -1;
2482                 return;
2483         }
2484
2485         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
2486                 guint8 *cards = sgen_card_table_get_card_scan_address ((mword) MS_BLOCK_FOR_BLOCK_INFO (block));
2487                 int i;
2488
2489                 if (!has_references)
2490                         continue;
2491
2492                 total_cards += CARDS_PER_BLOCK;
2493                 for (i = 0; i < CARDS_PER_BLOCK; ++i) {
2494                         if (cards [i])
2495                                 ++marked_cards;
2496                 }
2497         } END_FOREACH_BLOCK_NO_LOCK;
2498
2499         *num_total_cards = total_cards;
2500         *num_marked_cards = marked_cards;
2501 }
2502
2503 static void
2504 update_cardtable_mod_union (void)
2505 {
2506         MSBlockInfo *block;
2507
2508         FOREACH_BLOCK_NO_LOCK (block) {
2509                 gpointer *card_start = (gpointer*) sgen_card_table_get_card_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (block));
2510                 gboolean has_dirty_cards = FALSE;
2511                 int i;
2512                 for (i = 0; i < CARDS_PER_BLOCK / sizeof(gpointer); i++) {
2513                         if (card_start [i]) {
2514                                 has_dirty_cards = TRUE;
2515                                 break;
2516                         }
2517                 }
2518                 if (has_dirty_cards) {
2519                         size_t num_cards;
2520                         guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE);
2521                         sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards);
2522                         SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
2523                 }
2524         } END_FOREACH_BLOCK_NO_LOCK;
2525 }
2526
2527 #undef pthread_create
2528
2529 static void
2530 post_param_init (SgenMajorCollector *collector)
2531 {
2532         collector->sweeps_lazily = lazy_sweep;
2533         collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
2534 }
2535
2536 static void
2537 sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurrent, gboolean is_parallel)
2538 {
2539         int i;
2540
2541         sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, sizeof (MSBlockInfo));
2542
2543         num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL);
2544         block_obj_sizes = (int *)sgen_alloc_internal_dynamic (sizeof (int) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2545         ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, block_obj_sizes);
2546
2547         evacuate_block_obj_sizes = (gboolean *)sgen_alloc_internal_dynamic (sizeof (gboolean) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2548         for (i = 0; i < num_block_obj_sizes; ++i)
2549                 evacuate_block_obj_sizes [i] = FALSE;
2550
2551         sweep_slots_available = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2552         sweep_slots_used = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2553         sweep_num_blocks = (size_t *)sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2554
2555         /*
2556         {
2557                 int i;
2558                 g_print ("block object sizes:\n");
2559                 for (i = 0; i < num_block_obj_sizes; ++i)
2560                         g_print ("%d\n", block_obj_sizes [i]);
2561         }
2562         */
2563
2564         for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i)
2565                 free_block_lists [i] = (MSBlockInfo *volatile *)sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
2566
2567         for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES; ++i)
2568                 fast_block_obj_size_indexes [i] = ms_find_block_obj_size_index (i * 8);
2569         for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
2570                 g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
2571
2572         mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
2573         mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
2574         mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
2575         mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_ideal);
2576         mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_less_ideal);
2577         mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual);
2578         mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal);
2579
2580         collector->section_size = MAJOR_SECTION_SIZE;
2581
2582         concurrent_mark = is_concurrent;
2583         collector->is_concurrent = is_concurrent;
2584         collector->is_parallel = is_parallel;
2585         collector->needs_thread_pool = is_concurrent || concurrent_sweep;
2586         collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
2587         collector->supports_cardtable = TRUE;
2588
2589         collector->alloc_heap = major_alloc_heap;
2590         collector->is_object_live = major_is_object_live;
2591         collector->alloc_small_pinned_obj = major_alloc_small_pinned_obj;
2592         collector->alloc_degraded = major_alloc_degraded;
2593
2594         collector->alloc_object = major_alloc_object;
2595         collector->free_pinned_object = free_pinned_object;
2596         collector->iterate_objects = major_iterate_objects;
2597         collector->free_non_pinned_object = major_free_non_pinned_object;
2598         collector->pin_objects = major_pin_objects;
2599         collector->pin_major_object = pin_major_object;
2600         collector->scan_card_table = major_scan_card_table;
2601         collector->iterate_live_block_ranges = major_iterate_live_block_ranges;
2602         collector->iterate_block_ranges = major_iterate_block_ranges;
2603         if (is_concurrent) {
2604                 collector->update_cardtable_mod_union = update_cardtable_mod_union;
2605                 collector->get_cardtable_mod_union_for_reference = major_get_cardtable_mod_union_for_reference;
2606         }
2607         collector->init_to_space = major_init_to_space;
2608         collector->sweep = major_sweep;
2609         collector->have_swept = major_have_swept;
2610         collector->finish_sweeping = major_finish_sweep_checking;
2611         collector->free_swept_blocks = major_free_swept_blocks;
2612         collector->check_scan_starts = major_check_scan_starts;
2613         collector->dump_heap = major_dump_heap;
2614         collector->get_used_size = major_get_used_size;
2615         collector->start_nursery_collection = major_start_nursery_collection;
2616         collector->finish_nursery_collection = major_finish_nursery_collection;
2617         collector->start_major_collection = major_start_major_collection;
2618         collector->finish_major_collection = major_finish_major_collection;
2619         collector->ptr_is_in_non_pinned_space = major_ptr_is_in_non_pinned_space;
2620         collector->ptr_is_from_pinned_alloc = ptr_is_from_pinned_alloc;
2621         collector->report_pinned_memory_usage = major_report_pinned_memory_usage;
2622         collector->get_num_major_sections = get_num_major_sections;
2623         collector->get_bytes_survived_last_sweep = get_bytes_survived_last_sweep;
2624         collector->handle_gc_param = major_handle_gc_param;
2625         collector->print_gc_param_usage = major_print_gc_param_usage;
2626         collector->post_param_init = post_param_init;
2627         collector->is_valid_object = major_is_valid_object;
2628         collector->describe_pointer = major_describe_pointer;
2629         collector->count_cards = major_count_cards;
2630
2631         collector->major_ops_serial.copy_or_mark_object = major_copy_or_mark_object_canonical;
2632         collector->major_ops_serial.scan_object = major_scan_object_with_evacuation;
2633         collector->major_ops_serial.drain_gray_stack = drain_gray_stack;
2634         if (is_concurrent) {
2635                 collector->major_ops_concurrent_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
2636                 collector->major_ops_concurrent_start.scan_object = major_scan_object_concurrent_with_evacuation;
2637                 collector->major_ops_concurrent_start.scan_vtype = major_scan_vtype_concurrent_with_evacuation;
2638                 collector->major_ops_concurrent_start.scan_ptr_field = major_scan_ptr_field_concurrent_with_evacuation;
2639                 collector->major_ops_concurrent_start.drain_gray_stack = drain_gray_stack_concurrent;
2640
2641                 collector->major_ops_concurrent_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_finish_canonical;
2642                 collector->major_ops_concurrent_finish.scan_object = major_scan_object_with_evacuation;
2643                 collector->major_ops_concurrent_finish.scan_vtype = major_scan_vtype_with_evacuation;
2644                 collector->major_ops_concurrent_finish.scan_ptr_field = major_scan_ptr_field_with_evacuation;
2645                 collector->major_ops_concurrent_finish.drain_gray_stack = drain_gray_stack;
2646
2647                 if (is_parallel) {
2648                         /* FIXME use parallel obj ops */
2649                         collector->major_ops_conc_par_start = collector->major_ops_concurrent_start;
2650                         collector->major_ops_conc_par_finish = collector->major_ops_concurrent_finish;
2651                 }
2652         }
2653
2654 #ifdef HEAVY_STATISTICS
2655         mono_counters_register ("Optimized copy", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy);
2656         mono_counters_register ("Optimized copy nursery", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery);
2657         mono_counters_register ("Optimized copy nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_forwarded);
2658         mono_counters_register ("Optimized copy nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_pinned);
2659         mono_counters_register ("Optimized copy major", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major);
2660         mono_counters_register ("Optimized copy major small fast", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_fast);
2661         mono_counters_register ("Optimized copy major small slow", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_slow);
2662         mono_counters_register ("Optimized copy major small evacuate", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_evacuate);
2663         mono_counters_register ("Optimized copy major large", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_large);
2664         mono_counters_register ("Optimized major scan", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan);
2665         mono_counters_register ("Optimized major scan no refs", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan_no_refs);
2666
2667         mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_loops);
2668         mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fills);
2669         mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fill_failures);
2670 #endif
2671
2672 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
2673         mono_os_mutex_init (&scanned_objects_list_lock);
2674 #endif
2675
2676         SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2");
2677
2678         /*cardtable requires major pages to be 8 cards aligned*/
2679         g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0);
2680 }
2681
2682 void
2683 sgen_marksweep_init (SgenMajorCollector *collector)
2684 {
2685         sgen_marksweep_init_internal (collector, FALSE, FALSE);
2686 }
2687
2688 void
2689 sgen_marksweep_conc_init (SgenMajorCollector *collector)
2690 {
2691         sgen_marksweep_init_internal (collector, TRUE, FALSE);
2692 }
2693
2694 void
2695 sgen_marksweep_conc_par_init (SgenMajorCollector *collector)
2696 {
2697         sgen_marksweep_init_internal (collector, TRUE, TRUE);
2698 }
2699
2700 #endif