Merge pull request #1624 from esdrubal/getprocesstimes
[mono.git] / mono / metadata / sgen-marksweep.c
index bfd8e0d8ba62a62691f8659fd2b15833d51cbe15..db91b6773c85c5f9f898137fdb4b7d9b9b23d6cd 100644 (file)
 #include "metadata/sgen-pinning.h"
 #include "metadata/sgen-workers.h"
 
-#define SGEN_HAVE_CONCURRENT_MARK
-
-#define MS_BLOCK_SIZE  (16*1024)
-#define MS_BLOCK_SIZE_SHIFT    14
+#if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
+#define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
+#define MS_BLOCK_SIZE_SHIFT    ARCH_MIN_MS_BLOCK_SIZE_SHIFT
+#else
+#define MS_BLOCK_SIZE_SHIFT     14      /* INT FASTENABLE */
+#define MS_BLOCK_SIZE           (1 << MS_BLOCK_SIZE_SHIFT)
+#endif
 #define MAJOR_SECTION_SIZE     MS_BLOCK_SIZE
 #define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES)
 
  */
 #define MS_BLOCK_ALLOC_NUM     32
 
-#define BLOCK_INFO_IN_HEADER   1       /* BOOL FASTENABLE */
-#if !BLOCK_INFO_IN_HEADER
-#undef BLOCK_INFO_IN_HEADER
-#endif
-
 /*
  * Number of bytes before the first object in a block.  At the start
  * of a block is the MSBlockHeader, then opional padding, then come
 
 typedef struct _MSBlockInfo MSBlockInfo;
 struct _MSBlockInfo {
-       int obj_size;
-       int obj_size_index;
+       guint16 obj_size;
+       /*
+        * FIXME: Do we even need this? It's only used during sweep and might be worth
+        * recalculating to save the space.
+        */
+       guint16 obj_size_index;
        unsigned int pinned : 1;
        unsigned int has_references : 1;
        unsigned int has_pinned : 1;    /* means cannot evacuate */
        unsigned int is_to_space : 1;
        unsigned int swept : 1;
-#ifndef BLOCK_INFO_IN_HEADER
-       char *block;
-#endif
-       void **free_list;
-       MSBlockInfo *next_free;
-       size_t pin_queue_first_entry;
-       size_t pin_queue_last_entry;
-#ifdef SGEN_HAVE_CONCURRENT_MARK
+       void ** volatile free_list;
+       MSBlockInfo * volatile next_free;
        guint8 *cardtable_mod_union;
-#endif
        mword mark_words [MS_NUM_MARK_WORDS];
 };
 
-#ifdef BLOCK_INFO_IN_HEADER
 #define MS_BLOCK_FOR_BLOCK_INFO(b)     ((char*)(b))
-#else
-#define MS_BLOCK_FOR_BLOCK_INFO(b)     ((b)->block)
-#endif
 
 #define MS_BLOCK_OBJ(b,i)              (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i))
 #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size)            (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i))
 #define MS_BLOCK_DATA_FOR_OBJ(o)       ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1)))
 
 typedef struct {
-#ifdef BLOCK_INFO_IN_HEADER
        MSBlockInfo info;
-#else
-       MSBlockInfo *info;
-#endif
 } MSBlockHeader;
 
-#ifdef BLOCK_INFO_IN_HEADER
 #define MS_BLOCK_FOR_OBJ(o)            (&((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
-#else
-#define MS_BLOCK_FOR_OBJ(o)            (((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
-#endif
 
 /* object index will always be small */
 #define MS_BLOCK_OBJ_INDEX(o,b)        ((int)(((char*)(o) - (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP)) / (b)->obj_size))
@@ -139,7 +122,7 @@ typedef struct {
 
 #define MS_OBJ_ALLOCED(o,b)    (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE))
 
-#define MS_BLOCK_OBJ_SIZE_FACTOR       (sqrt (2.0))
+#define MS_BLOCK_OBJ_SIZE_FACTOR       (pow (2.0, 1.0 / 3))
 
 /*
  * This way we can lookup block object size indexes for sizes up to
@@ -158,17 +141,13 @@ static int fast_block_obj_size_indexes [MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES];
 
 static gboolean *evacuate_block_obj_sizes;
 static float evacuation_threshold = 0.666f;
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static float concurrent_evacuation_threshold = 0.666f;
 static gboolean want_evacuation = FALSE;
-#endif
 
 static gboolean lazy_sweep = TRUE;
-static gboolean have_swept;
+static gboolean have_swept = TRUE;
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static gboolean concurrent_mark;
-#endif
 
 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)     SGEN_POINTER_IS_TAGGED_1 ((bl))
 #define BLOCK_TAG_HAS_REFERENCES(bl)           SGEN_POINTER_TAG_1 ((bl))
@@ -192,20 +171,20 @@ static size_t num_major_sections = 0;
 /* one free block list for each block object size */
 static MSBlockInfo **free_block_lists [MS_BLOCK_TYPE_MAX];
 
-static long long stat_major_blocks_alloced = 0;
-static long long stat_major_blocks_freed = 0;
-static long long stat_major_blocks_lazy_swept = 0;
-static long long stat_major_objects_evacuated = 0;
+static guint64 stat_major_blocks_alloced = 0;
+static guint64 stat_major_blocks_freed = 0;
+static guint64 stat_major_blocks_lazy_swept = 0;
+static guint64 stat_major_objects_evacuated = 0;
 
 #if SIZEOF_VOID_P != 8
-static long long stat_major_blocks_freed_ideal = 0;
-static long long stat_major_blocks_freed_less_ideal = 0;
-static long long stat_major_blocks_freed_individual = 0;
-static long long stat_major_blocks_alloced_less_ideal = 0;
+static guint64 stat_major_blocks_freed_ideal = 0;
+static guint64 stat_major_blocks_freed_less_ideal = 0;
+static guint64 stat_major_blocks_freed_individual = 0;
+static guint64 stat_major_blocks_alloced_less_ideal = 0;
 #endif
 
 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
-static long long num_major_objects_marked = 0;
+static guint64 num_major_objects_marked = 0;
 #define INC_NUM_MAJOR_OBJECTS_MARKED() (++num_major_objects_marked)
 #else
 #define INC_NUM_MAJOR_OBJECTS_MARKED()
@@ -330,6 +309,10 @@ ms_get_empty_block (void)
        return block;
 }
 
+/*
+ * This doesn't actually free a block immediately, but enqueues it into the `empty_blocks`
+ * list, where it will either be freed later on, or reused in nursery collections.
+ */
 static void
 ms_free_block (void *block)
 {
@@ -438,17 +421,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
        if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR))
                return FALSE;
 
-#ifdef BLOCK_INFO_IN_HEADER
        info = (MSBlockInfo*)ms_get_empty_block ();
-#else
-       {
-               MSBlockHeader *header;
-               info = sgen_alloc_internal (INTERNAL_MEM_MS_BLOCK_INFO);
-               info->block = ms_get_empty_block ();
-               header = (MSBlockHeader*)info->block;
-               header->info = info;
-       }
-#endif
 
        SGEN_ASSERT (9, count >= 2, "block with %d objects, it must hold at least 2", count);
 
@@ -465,9 +438,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         */
        info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD);
        info->swept = 1;
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        info->cardtable_mod_union = NULL;
-#endif
 
        update_heap_boundaries_for_block (info);
 
@@ -852,7 +823,6 @@ major_dump_heap (FILE *heap_dump_file)
 
 #define LOAD_VTABLE    SGEN_LOAD_VTABLE
 
-#ifdef SGEN_MARK_ON_ENQUEUE
 #define MS_MARK_OBJECT_AND_ENQUEUE_CHECKED(obj,desc,block,queue) do {  \
                int __word, __bit;                                      \
                MS_CALC_MARK_BIT (__word, __bit, (obj));                \
@@ -876,60 +846,14 @@ major_dump_heap (FILE *heap_dump_file)
                        INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                }                                                       \
        } while (0)
-#define MS_PAR_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {      \
-               int __word, __bit;                                      \
-               gboolean __was_marked;                                  \
-               SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
-               MS_CALC_MARK_BIT (__word, __bit, (obj));                \
-               MS_PAR_SET_MARK_BIT (__was_marked, (block), __word, __bit); \
-               if (!__was_marked) {                                    \
-                       if (sgen_gc_descr_has_references (desc))                        \
-                               GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
-                       binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
-                       INC_NUM_MAJOR_OBJECTS_MARKED ();                \
-               }                                                       \
-       } while (0)
-#else
-#define MS_MARK_OBJECT_AND_ENQUEUE_CHECKED(obj,desc,block,queue) do {  \
-               int __word, __bit;                                      \
-               SGEN_ASSERT (0, sgen_get_current_collection_generation () == GENERATION_OLD, "Can't majorly enqueue objects when doing minor collection"); \
-               MS_CALC_MARK_BIT (__word, __bit, (obj));                \
-               if (MS_OBJ_ALLOCED ((obj), (block))) { \
-                       if (sgen_gc_descr_has_references (desc)) {                                              \
-                               GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
-                       } else {                                        \
-                               MS_SET_MARK_BIT ((block), __word, __bit); \
-                               binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
-                       }                                               \
-                       INC_NUM_MAJOR_OBJECTS_MARKED ();                \
-               }                                                       \
-       } while (0)
-#define MS_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {          \
-               int __word, __bit;                                      \
-               SGEN_ASSERT (0, sgen_get_current_collection_generation () == GENERATION_OLD, "Can't majorly enqueue objects when doing minor collection"); \
-               MS_CALC_MARK_BIT (__word, __bit, (obj));                \
-               SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj);       \
-               {               \
-                       if (sgen_gc_descr_has_references (desc)) {                      \
-                               GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
-                       } else {                                        \
-                               MS_SET_MARK_BIT ((block), __word, __bit); \
-                               binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
-                       }                                               \
-                       INC_NUM_MAJOR_OBJECTS_MARKED ();                \
-               }                                                       \
-       } while (0)
-#endif
 
 static void
 pin_major_object (char *obj, SgenGrayQueue *queue)
 {
        MSBlockInfo *block;
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        if (concurrent_mark)
                g_assert_not_reached ();
-#endif
 
        block = MS_BLOCK_FOR_OBJ (obj);
        block->has_pinned = TRUE;
@@ -938,7 +862,6 @@ pin_major_object (char *obj, SgenGrayQueue *queue)
 
 #include "sgen-major-copy-object.h"
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static void
 major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
 {
@@ -973,7 +896,6 @@ major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, Sge
                }
        }
 }
-#endif
 
 static long long
 major_get_and_reset_num_major_objects_marked (void)
@@ -987,78 +909,50 @@ major_get_and_reset_num_major_objects_marked (void)
 #endif
 }
 
-#define USE_PREFETCH_QUEUE     0       /* BOOL FASTENABLE */
-#if !USE_PREFETCH_QUEUE
-#undef USE_PREFETCH_QUEUE
+#define PREFETCH_CARDS         1       /* BOOL FASTENABLE */
+#if !PREFETCH_CARDS
+#undef PREFETCH_CARDS
 #endif
 
-#define DESCRIPTOR_FAST_PATH   1       /* BOOL FASTENABLE */
-#if !DESCRIPTOR_FAST_PATH
-#undef DESCRIPTOR_FAST_PATH
+/* gcc 4.2.1 from xcode4 crashes on sgen_card_table_get_card_address () when this is enabled */
+#if defined(PLATFORM_MACOSX)
+#define GCC_VERSION (__GNUC__ * 10000 \
+                               + __GNUC_MINOR__ * 100 \
+                               + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION <= 40300
+#undef PREFETCH_CARDS
 #endif
-
-#ifdef HEAVY_STATISTICS
-static long long stat_optimized_copy;
-static long long stat_optimized_copy_nursery;
-static long long stat_optimized_copy_nursery_forwarded;
-static long long stat_optimized_copy_nursery_pinned;
-#ifdef SGEN_MARK_ON_ENQUEUE
-static long long stat_optimized_copy_major;
-static long long stat_optimized_copy_major_small_fast;
-static long long stat_optimized_copy_major_small_slow;
-static long long stat_optimized_copy_major_large;
-#endif
-static long long stat_optimized_major_mark;
-static long long stat_optimized_major_mark_small;
-static long long stat_optimized_major_mark_large;
-static long long stat_optimized_major_scan;
-static long long stat_optimized_major_scan_no_refs;
-#ifdef DESCRIPTOR_FAST_PATH
-static long long stat_optimized_major_scan_fast;
-static long long stat_optimized_major_scan_slow;
 #endif
 
-static long long stat_drain_prefetch_fills;
-static long long stat_drain_prefetch_fill_failures;
-static long long stat_drain_loops;
-#endif
-
-static inline void
-sgen_gray_object_dequeue_fast (SgenGrayQueue *queue, char** obj, mword *desc) {
-       GrayQueueEntry *cursor = queue->prefetch_cursor;
-       GrayQueueEntry *const end = queue->prefetch + SGEN_GRAY_QUEUE_PREFETCH_SIZE;
-       *obj = cursor->obj;
-#ifdef SGEN_GRAY_QUEUE_HAVE_DESCRIPTORS
-       *desc = cursor->desc;
-       GRAY_OBJECT_DEQUEUE (queue, &cursor->obj, &cursor->desc);
-#else
-       GRAY_OBJECT_DEQUEUE (queue, &cursor->obj, NULL);
-#endif
-
-#if !defined (SGEN_MARK_ON_ENQUEUE) && defined (BLOCK_INFO_IN_HEADER)
-       {
-               int word, bit;
-               MSBlockInfo *block = (MSBlockInfo*)MS_BLOCK_DATA_FOR_OBJ (cursor->obj);
-               MS_CALC_MARK_BIT (word, bit, cursor->obj);
-               PREFETCH_WRITE (&block->mark_words [word]);
-       }
+#ifdef HEAVY_STATISTICS
+static guint64 stat_optimized_copy;
+static guint64 stat_optimized_copy_nursery;
+static guint64 stat_optimized_copy_nursery_forwarded;
+static guint64 stat_optimized_copy_nursery_pinned;
+static guint64 stat_optimized_copy_major;
+static guint64 stat_optimized_copy_major_small_fast;
+static guint64 stat_optimized_copy_major_small_slow;
+static guint64 stat_optimized_copy_major_large;
+static guint64 stat_optimized_copy_major_forwarded;
+static guint64 stat_optimized_copy_major_small_evacuate;
+static guint64 stat_optimized_major_scan;
+static guint64 stat_optimized_major_scan_no_refs;
+
+static guint64 stat_drain_prefetch_fills;
+static guint64 stat_drain_prefetch_fill_failures;
+static guint64 stat_drain_loops;
 #endif
 
-       PREFETCH_READ (cursor->obj);
-       ++cursor;
-       if (cursor == end)
-               cursor = queue->prefetch;
-       queue->prefetch_cursor = cursor;
-}
-
-static void major_scan_object (char *start, mword desc, SgenGrayQueue *queue);
+static void major_scan_object_with_evacuation (char *start, mword desc, SgenGrayQueue *queue);
 
 #define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_no_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_no_evacuation
 #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_no_evacuation
 #include "sgen-marksweep-drain-gray-stack.h"
 
 #define COPY_OR_MARK_WITH_EVACUATION
 #define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_with_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_with_evacuation
 #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_with_evacuation
 #include "sgen-marksweep-drain-gray-stack.h"
 
@@ -1066,7 +960,8 @@ static gboolean
 drain_gray_stack (ScanCopyContext ctx)
 {
        gboolean evacuation = FALSE;
-       for (int i = 0; i < num_block_obj_sizes; ++i) {
+       int i;
+       for (i = 0; i < num_block_obj_sizes; ++i) {
                if (evacuate_block_obj_sizes [i]) {
                        evacuation = TRUE;
                        break;
@@ -1079,13 +974,7 @@ drain_gray_stack (ScanCopyContext ctx)
                return drain_gray_stack_no_evacuation (ctx);
 }
 
-#include "sgen-major-scan-object.h"
-
-#ifdef SGEN_HAVE_CONCURRENT_MARK
-#define SCAN_FOR_CONCURRENT_MARK
-#include "sgen-major-scan-object.h"
-#undef SCAN_FOR_CONCURRENT_MARK
-#endif
+#include "sgen-marksweep-scan-object-concurrent.h"
 
 static void
 major_copy_or_mark_object_canonical (void **ptr, SgenGrayQueue *queue)
@@ -1093,27 +982,25 @@ major_copy_or_mark_object_canonical (void **ptr, SgenGrayQueue *queue)
        major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
 }
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static void
 major_copy_or_mark_object_concurrent_canonical (void **ptr, SgenGrayQueue *queue)
 {
        major_copy_or_mark_object_with_evacuation_concurrent (ptr, *ptr, queue);
 }
-#endif
 
 static void
-mark_pinned_objects_in_block (MSBlockInfo *block, SgenGrayQueue *queue)
+mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue)
 {
        void **entry, **end;
        int last_index = -1;
 
-       if (block->pin_queue_first_entry == block->pin_queue_last_entry)
+       if (first_entry == last_entry)
                return;
 
        block->has_pinned = TRUE;
 
-       entry = sgen_pinning_get_entry (block->pin_queue_first_entry);
-       end = sgen_pinning_get_entry (block->pin_queue_last_entry);
+       entry = sgen_pinning_get_entry (first_entry);
+       end = sgen_pinning_get_entry (last_entry);
 
        for (; entry < end; ++entry) {
                int index = MS_BLOCK_OBJ_INDEX (*entry, block);
@@ -1226,7 +1113,7 @@ bitcount (mword d)
 }
 
 static void
-ms_sweep (void)
+major_sweep (void)
 {
        int i;
        MSBlockInfo *block;
@@ -1236,10 +1123,8 @@ ms_sweep (void)
        int *slots_used = alloca (sizeof (int) * num_block_obj_sizes);
        int *num_blocks = alloca (sizeof (int) * num_block_obj_sizes);
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        mword total_evacuate_heap = 0;
        mword total_evacuate_saved = 0;
-#endif
 
        for (i = 0; i < num_block_obj_sizes; ++i)
                slots_available [i] = slots_used [i] = num_blocks [i] = 0;
@@ -1271,12 +1156,10 @@ ms_sweep (void)
 
                count = MS_BLOCK_FREE / block->obj_size;
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
                if (block->cardtable_mod_union) {
                        sgen_free_internal_dynamic (block->cardtable_mod_union, CARDS_PER_BLOCK, INTERNAL_MEM_CARDTABLE_MOD_UNION);
                        block->cardtable_mod_union = NULL;
                }
-#endif
 
                /* Count marked objects in the block */
                for (i = 0; i < MS_NUM_MARK_WORDS; ++i) {
@@ -1318,12 +1201,7 @@ ms_sweep (void)
                        DELETE_BLOCK_IN_FOREACH ();
 
                        binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
-#if defined (FIXED_HEAP) || defined (BLOCK_INFO_IN_HEADER)
                        ms_free_block (block);
-#else
-                       ms_free_block (block->block);
-                       sgen_free_internal (block, INTERNAL_MEM_MS_BLOCK_INFO);
-#endif
 
                        --num_major_sections;
                }
@@ -1341,27 +1219,23 @@ ms_sweep (void)
                } else {
                        evacuate_block_obj_sizes [i] = FALSE;
                }
-#ifdef SGEN_HAVE_CONCURRENT_MARK
                {
                        mword total_bytes = block_obj_sizes [i] * slots_available [i];
                        total_evacuate_heap += total_bytes;
                        if (evacuate_block_obj_sizes [i])
                                total_evacuate_saved += total_bytes - block_obj_sizes [i] * slots_used [i];
                }
-#endif
        }
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        want_evacuation = (float)total_evacuate_saved / (float)total_evacuate_heap > (1 - concurrent_evacuation_threshold);
-#endif
 
        have_swept = TRUE;
 }
 
-static void
-major_sweep (void)
+static gboolean
+major_have_finished_sweeping (void)
 {
-       ms_sweep ();
+       return have_swept;
 }
 
 static int count_pinned_ref;
@@ -1415,12 +1289,26 @@ count_ref_nonref_objs (void)
 static int
 ms_calculate_block_obj_sizes (double factor, int *arr)
 {
-       double target_size = sizeof (MonoObject);
+       double target_size;
        int num_sizes = 0;
        int last_size = 0;
 
+       /*
+        * Have every possible slot size starting with the minimal
+        * object size up to and including four times that size.  Then
+        * proceed by increasing geometrically with the given factor.
+        */
+
+       for (int size = sizeof (MonoObject); size <= 4 * sizeof (MonoObject); size += SGEN_ALLOC_ALIGN) {
+               if (arr)
+                       arr [num_sizes] = size;
+               ++num_sizes;
+               last_size = size;
+       }
+       target_size = (double)last_size;
+
        do {
-               int target_count = (int)ceil (MS_BLOCK_FREE / target_size);
+               int target_count = (int)floor (MS_BLOCK_FREE / target_size);
                int size = MIN ((MS_BLOCK_FREE / target_count) & ~(SGEN_ALLOC_ALIGN - 1), SGEN_MAX_SMALL_OBJ_SIZE);
 
                if (size != last_size) {
@@ -1484,6 +1372,9 @@ major_start_major_collection (void)
 
                MONO_GC_SWEEP_END (GENERATION_OLD, TRUE);
        }
+
+       SGEN_ASSERT (0, have_swept, "Cannot start major collection without having finished sweeping");
+       have_swept = FALSE;
 }
 
 static void
@@ -1514,7 +1405,7 @@ compare_pointers (const void *va, const void *vb) {
 #endif
 
 static void
-major_have_computer_minor_collection_allowance (void)
+major_free_swept_blocks (void)
 {
        size_t section_reserve = sgen_get_minor_collection_allowance () / MS_BLOCK_SIZE;
 
@@ -1667,24 +1558,17 @@ major_have_computer_minor_collection_allowance (void)
        }
 }
 
-static void
-major_find_pin_queue_start_ends (SgenGrayQueue *queue)
-{
-       MSBlockInfo *block;
-
-       FOREACH_BLOCK (block) {
-               sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
-                               &block->pin_queue_first_entry, &block->pin_queue_last_entry);
-       } END_FOREACH_BLOCK;
-}
-
 static void
 major_pin_objects (SgenGrayQueue *queue)
 {
        MSBlockInfo *block;
 
        FOREACH_BLOCK (block) {
-               mark_pinned_objects_in_block (block, queue);
+               size_t first_entry, last_entry;
+               SGEN_ASSERT (0, block->swept, "All blocks must be swept when we're pinning.");
+               sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
+                               &first_entry, &last_entry);
+               mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
        } END_FOREACH_BLOCK;
 }
 
@@ -1768,10 +1652,10 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
 }
 
 #ifdef HEAVY_STATISTICS
-extern long long marked_cards;
-extern long long scanned_cards;
-extern long long scanned_objects;
-extern long long remarked_cards;
+extern guint64 marked_cards;
+extern guint64 scanned_cards;
+extern guint64 scanned_objects;
+extern guint64 remarked_cards;
 #endif
 
 #define CARD_WORDS_PER_BLOCK (CARDS_PER_BLOCK / SIZEOF_VOID_P)
@@ -1810,19 +1694,16 @@ initial_skip_card (guint8 *card_data)
 #endif
 }
 
-
-static G_GNUC_UNUSED guint8*
-skip_card (guint8 *card_data, guint8 *card_data_end)
-{
-       while (card_data < card_data_end && !*card_data)
-               ++card_data;
-       return card_data;
-}
-
 #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os)        (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os))
 #define MS_BLOCK_OBJ_FAST(b,os,i)                      ((b) + MS_BLOCK_SKIP + (os) * (i))
 #define MS_OBJ_ALLOCED_FAST(o,b)               (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE))
 
+static size_t
+card_offset (char *obj, char *base)
+{
+       return (obj - base) >> CARD_BITS;
+}
+
 static void
 major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
 {
@@ -1830,156 +1711,138 @@ major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
        gboolean has_references;
        ScanObjectFunc scan_func = sgen_get_current_object_ops ()->scan_object;
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        if (!concurrent_mark)
                g_assert (!mod_union);
-#else
-       g_assert (!mod_union);
-#endif
 
        FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
+#ifndef SGEN_HAVE_OVERLAPPING_CARDS
+               guint8 cards_copy [CARDS_PER_BLOCK];
+#endif
+               gboolean small_objects;
                int block_obj_size;
                char *block_start;
+               guint8 *card_data, *card_base;
+               guint8 *card_data_end;
+               char *scan_front = NULL;
+
+#ifdef PREFETCH_CARDS
+               int prefetch_index = __index + 6;
+               if (prefetch_index < allocated_blocks.next_slot) {
+                       MSBlockInfo *prefetch_block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [prefetch_index]);
+                       guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
+                       PREFETCH_READ (prefetch_block);
+                       PREFETCH_WRITE (prefetch_cards);
+                       PREFETCH_WRITE (prefetch_cards + 32);
+                }
+#endif
 
                if (!has_references)
                        continue;
 
                block_obj_size = block->obj_size;
-               block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
+               small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
 
-               if (block_obj_size >= CARD_SIZE_IN_BYTES) {
-                       guint8 *cards;
-#ifndef SGEN_HAVE_OVERLAPPING_CARDS
-                       guint8 cards_data [CARDS_PER_BLOCK];
-#endif
-                       char *obj, *end, *base;
+               block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
 
-                       if (mod_union) {
-#ifdef SGEN_HAVE_CONCURRENT_MARK
-                               cards = block->cardtable_mod_union;
-                               /*
-                                * This happens when the nursery
-                                * collection that precedes finishing
-                                * the concurrent collection allocates
-                                * new major blocks.
-                                */
-                               if (!cards)
-                                       continue;
-#endif
-                       } else {
-                       /*We can avoid the extra copy since the remark cardtable was cleaned before */
+               /*
+                * This is safe in face of card aliasing for the following reason:
+                *
+                * Major blocks are 16k aligned, or 32 cards aligned.
+                * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
+                * sizes, they won't overflow the cardtable overlap modulus.
+                */
+               if (mod_union) {
+                       card_data = card_base = block->cardtable_mod_union;
+                       /*
+                        * This happens when the nursery collection that precedes finishing
+                        * the concurrent collection allocates new major blocks.
+                        */
+                       if (!card_data)
+                               continue;
+               } else {
 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
-                               cards = sgen_card_table_get_card_scan_address ((mword)block_start);
+                       card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
 #else
-                               cards = cards_data;
-                               if (!sgen_card_table_get_card_data (cards_data, (mword)block_start, CARDS_PER_BLOCK))
-                                       continue;
+                       if (!sgen_card_table_get_card_data (cards_copy, (mword)block_start, CARDS_PER_BLOCK))
+                               continue;
+                       card_data = card_base = cards_copy;
 #endif
-                       }
-
-                       obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, 0);
-                       end = block_start + MS_BLOCK_SIZE;
-                       base = sgen_card_table_align_pointer (obj);
-
-                       while (obj < end) {
-                               size_t card_offset;
-
-                               if (!block->swept)
-                                       sweep_block (block, FALSE);
-
-                               if (!MS_OBJ_ALLOCED_FAST (obj, block_start))
-                                       goto next_large;
+               }
+               card_data_end = card_data + CARDS_PER_BLOCK;
 
-                               if (mod_union) {
-                                       /* FIXME: do this more efficiently */
-                                       int w, b;
-                                       MS_CALC_MARK_BIT (w, b, obj);
-                                       if (!MS_MARK_BIT (block, w, b))
-                                               goto next_large;
-                               }
+               card_data += MS_BLOCK_SKIP >> CARD_BITS;
 
-                               card_offset = (obj - base) >> CARD_BITS;
-                               sgen_cardtable_scan_object (obj, block_obj_size, cards + card_offset, mod_union, queue);
+               card_data = initial_skip_card (card_data);
+               while (card_data < card_data_end) {
+                       size_t card_index, first_object_index;
+                       char *start;
+                       char *end;
+                       char *first_obj, *obj;
 
-                       next_large:
-                               obj += block_obj_size;
-                       }
-               } else {
-                       guint8 *card_data, *card_base;
-                       guint8 *card_data_end;
+                       HEAVY_STAT (++scanned_cards);
 
-                       /*
-                        * This is safe in face of card aliasing for the following reason:
-                        *
-                        * Major blocks are 16k aligned, or 32 cards aligned.
-                        * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
-                        * sizes, they won't overflow the cardtable overlap modulus.
-                        */
-                       if (mod_union) {
-#ifdef SGEN_HAVE_CONCURRENT_MARK
-                               card_data = card_base = block->cardtable_mod_union;
-                               /*
-                                * This happens when the nursery
-                                * collection that precedes finishing
-                                * the concurrent collection allocates
-                                * new major blocks.
-                                */
-                               if (!card_data)
-                                       continue;
-#else
-                               g_assert_not_reached ();
-                               card_data = NULL;
-#endif
-                       } else {
-                               card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
+                       if (!*card_data) {
+                               ++card_data;
+                               continue;
                        }
-                       card_data_end = card_data + CARDS_PER_BLOCK;
 
-                       for (card_data = initial_skip_card (card_data); card_data < card_data_end; ++card_data) { //card_data = skip_card (card_data + 1, card_data_end)) {
-                               size_t index;
-                               size_t idx = card_data - card_base;
-                               char *start = (char*)(block_start + idx * CARD_SIZE_IN_BYTES);
-                               char *end = start + CARD_SIZE_IN_BYTES;
-                               char *first_obj, *obj;
-
-                               HEAVY_STAT (++scanned_cards);
-
-                               if (!*card_data)
-                                       continue;
+                       card_index = card_data - card_base;
+                       start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
+                       end = start + CARD_SIZE_IN_BYTES;
 
-                               if (!block->swept)
-                                       sweep_block (block, FALSE);
+                       if (!block->swept)
+                               sweep_block (block, FALSE);
 
-                               HEAVY_STAT (++marked_cards);
+                       HEAVY_STAT (++marked_cards);
 
+                       if (small_objects)
                                sgen_card_table_prepare_card_for_scanning (card_data);
 
-                               if (idx == 0)
-                                       index = 0;
-                               else
-                                       index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
+                       /*
+                        * If the card we're looking at starts at or in the block header, we
+                        * must start at the first object in the block, without calculating
+                        * the index of the object we're hypothetically starting at, because
+                        * it would be negative.
+                        */
+                       if (card_index <= (MS_BLOCK_SKIP >> CARD_BITS))
+                               first_object_index = 0;
+                       else
+                               first_object_index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
 
-                               obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, index);
-                               while (obj < end) {
-                                       if (!MS_OBJ_ALLOCED_FAST (obj, block_start))
-                                               goto next_small;
+                       obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
 
-                                       if (mod_union) {
-                                               /* FIXME: do this more efficiently */
-                                               int w, b;
-                                               MS_CALC_MARK_BIT (w, b, obj);
-                                               if (!MS_MARK_BIT (block, w, b))
-                                                       goto next_small;
-                                       }
+                       while (obj < end) {
+                               if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
+                                       goto next_object;
 
+                               if (mod_union) {
+                                       /* FIXME: do this more efficiently */
+                                       int w, b;
+                                       MS_CALC_MARK_BIT (w, b, obj);
+                                       if (!MS_MARK_BIT (block, w, b))
+                                               goto next_object;
+                               }
+
+                               if (small_objects) {
                                        HEAVY_STAT (++scanned_objects);
                                        scan_func (obj, sgen_obj_get_descriptor (obj), queue);
-                               next_small:
-                                       obj += block_obj_size;
+                               } else {
+                                       size_t offset = card_offset (obj, block_start);
+                                       sgen_cardtable_scan_object (obj, block_obj_size, card_base + offset, mod_union, queue);
                                }
-                               HEAVY_STAT (if (*card_data) ++remarked_cards);
-                               binary_protocol_card_scan (first_obj, obj - first_obj);
+                       next_object:
+                               obj += block_obj_size;
+                               g_assert (scan_front <= obj);
+                               scan_front = obj;
                        }
+
+                       HEAVY_STAT (if (*card_data) ++remarked_cards);
+                       binary_protocol_card_scan (first_obj, obj - first_obj);
+
+                       if (small_objects)
+                               ++card_data;
+                       else
+                               card_data = card_base + card_offset (obj, block_start);
                }
        } END_FOREACH_BLOCK;
 }
@@ -2010,7 +1873,6 @@ major_count_cards (long long *num_total_cards, long long *num_marked_cards)
        *num_marked_cards = marked_cards;
 }
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static void
 update_cardtable_mod_union (void)
 {
@@ -2030,9 +1892,9 @@ static guint8*
 major_get_cardtable_mod_union_for_object (char *obj)
 {
        MSBlockInfo *block = MS_BLOCK_FOR_OBJ (obj);
-       return &block->cardtable_mod_union [(obj - (char*)sgen_card_table_align_pointer (MS_BLOCK_FOR_BLOCK_INFO (block))) >> CARD_BITS];
+       size_t offset = card_offset (obj, (char*)sgen_card_table_align_pointer (MS_BLOCK_FOR_BLOCK_INFO (block)));
+       return &block->cardtable_mod_union [offset];
 }
-#endif
 
 static void
 alloc_free_block_lists (MSBlockInfo ***lists)
@@ -2050,12 +1912,8 @@ post_param_init (SgenMajorCollector *collector)
        collector->sweeps_lazily = lazy_sweep;
 }
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 static void
 sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurrent)
-#else // SGEN_HAVE_CONCURRENT_MARK
-#error unknown configuration
-#endif // SGEN_HAVE_CONCURRENT_MARK
 {
        int i;
 
@@ -2085,35 +1943,30 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
                g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
 
-       mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_alloced);
-       mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed);
-       mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_lazy_swept);
-       mono_counters_register ("# major objects evacuated", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_objects_evacuated);
+       mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
+       mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
+       mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
+       mono_counters_register ("# major objects evacuated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_objects_evacuated);
 #if SIZEOF_VOID_P != 8
-       mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_ideal);
-       mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_less_ideal);
-       mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_individual);
-       mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_alloced_less_ideal);
+       mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_ideal);
+       mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_less_ideal);
+       mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual);
+       mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal);
 #endif
 
        collector->section_size = MAJOR_SECTION_SIZE;
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        concurrent_mark = is_concurrent;
        if (is_concurrent) {
                collector->is_concurrent = TRUE;
                collector->want_synchronous_collection = &want_evacuation;
-       } else
-#endif
-       {
+       } else {
                collector->is_concurrent = FALSE;
                collector->want_synchronous_collection = NULL;
        }
        collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
        collector->supports_cardtable = TRUE;
 
-       collector->have_swept = &have_swept;
-
        collector->alloc_heap = major_alloc_heap;
        collector->is_object_live = major_is_object_live;
        collector->alloc_small_pinned_obj = major_alloc_small_pinned_obj;
@@ -2123,19 +1976,18 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->free_pinned_object = free_pinned_object;
        collector->iterate_objects = major_iterate_objects;
        collector->free_non_pinned_object = major_free_non_pinned_object;
-       collector->find_pin_queue_start_ends = major_find_pin_queue_start_ends;
        collector->pin_objects = major_pin_objects;
        collector->pin_major_object = pin_major_object;
        collector->scan_card_table = major_scan_card_table;
        collector->iterate_live_block_ranges = (void*)(void*) major_iterate_live_block_ranges;
-#ifdef SGEN_HAVE_CONCURRENT_MARK
        if (is_concurrent) {
                collector->update_cardtable_mod_union = update_cardtable_mod_union;
                collector->get_cardtable_mod_union_for_object = major_get_cardtable_mod_union_for_object;
        }
-#endif
        collector->init_to_space = major_init_to_space;
        collector->sweep = major_sweep;
+       collector->have_finished_sweeping = major_have_finished_sweeping;
+       collector->free_swept_blocks = major_free_swept_blocks;
        collector->check_scan_starts = major_check_scan_starts;
        collector->dump_heap = major_dump_heap;
        collector->get_used_size = major_get_used_size;
@@ -2143,7 +1995,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->finish_nursery_collection = major_finish_nursery_collection;
        collector->start_major_collection = major_start_major_collection;
        collector->finish_major_collection = major_finish_major_collection;
-       collector->have_computed_minor_collection_allowance = major_have_computer_minor_collection_allowance;
        collector->ptr_is_in_non_pinned_space = major_ptr_is_in_non_pinned_space;
        collector->obj_is_from_pinned_alloc = obj_is_from_pinned_alloc;
        collector->report_pinned_memory_usage = major_report_pinned_memory_usage;
@@ -2156,14 +2007,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->count_cards = major_count_cards;
 
        collector->major_ops.copy_or_mark_object = major_copy_or_mark_object_canonical;
-       collector->major_ops.scan_object = major_scan_object;
-#ifdef SGEN_HAVE_CONCURRENT_MARK
+       collector->major_ops.scan_object = major_scan_object_with_evacuation;
        if (is_concurrent) {
                collector->major_concurrent_ops.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
-               collector->major_concurrent_ops.scan_object = major_scan_object_concurrent;
+               collector->major_concurrent_ops.scan_object = major_scan_object_no_mark_concurrent;
                collector->major_concurrent_ops.scan_vtype = major_scan_vtype_concurrent;
        }
-#endif
 
 #if !defined (FIXED_HEAP) && !defined (SGEN_PARALLEL_MARK)
        /* FIXME: this will not work with evacuation or the split nursery. */
@@ -2171,33 +2020,24 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
                collector->drain_gray_stack = drain_gray_stack;
 
 #ifdef HEAVY_STATISTICS
-       mono_counters_register ("Optimized copy", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy);
-       mono_counters_register ("Optimized copy nursery", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_nursery);
-       mono_counters_register ("Optimized copy nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_nursery_forwarded);
-       mono_counters_register ("Optimized copy nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_nursery_pinned);
-#ifdef SGEN_MARK_ON_ENQUEUE
-       mono_counters_register ("Optimized copy major", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_major);
-       mono_counters_register ("Optimized copy major small fast", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_major_small_fast);
-       mono_counters_register ("Optimized copy major small slow", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_major_small_slow);
-       mono_counters_register ("Optimized copy major large", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_major_large);
+       mono_counters_register ("Optimized copy", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy);
+       mono_counters_register ("Optimized copy nursery", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery);
+       mono_counters_register ("Optimized copy nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_forwarded);
+       mono_counters_register ("Optimized copy nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_pinned);
+       mono_counters_register ("Optimized copy major", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major);
+       mono_counters_register ("Optimized copy major small fast", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_fast);
+       mono_counters_register ("Optimized copy major small slow", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_slow);
+       mono_counters_register ("Optimized copy major large", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_large);
+       mono_counters_register ("Optimized major scan", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan);
+       mono_counters_register ("Optimized major scan no refs", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan_no_refs);
+
+       mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_loops);
+       mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fills);
+       mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fill_failures);
 #endif
-       mono_counters_register ("Optimized major mark", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_mark);
-       mono_counters_register ("Optimized major mark small", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_mark_small);
-       mono_counters_register ("Optimized major mark large", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_mark_large);
-       mono_counters_register ("Optimized major scan", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_scan);
-       mono_counters_register ("Optimized major scan no refs", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_scan_no_refs);
-#ifdef DESCRIPTOR_FAST_PATH
-       mono_counters_register ("Optimized major scan slow", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_scan_slow);
-       mono_counters_register ("Optimized major scan fast", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_scan_fast);
 #endif
 
-       mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_loops);
-       mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fills);
-       mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fill_failures);
-#endif
-#endif
-
-#ifdef HEAVY_STATISTICS
+#ifdef SGEN_HEAVY_BINARY_PROTOCOL
        mono_mutex_init (&scanned_objects_list_lock);
 #endif
 
@@ -2207,7 +2047,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0);
 }
 
-#ifdef SGEN_HAVE_CONCURRENT_MARK
 void
 sgen_marksweep_init (SgenMajorCollector *collector)
 {
@@ -2219,6 +2058,5 @@ sgen_marksweep_conc_init (SgenMajorCollector *collector)
 {
        sgen_marksweep_init_internal (collector, TRUE);
 }
-#endif
 
 #endif