Merge remote-tracking branch 'mfoliveira/ppc64el-v2'
[mono.git] / mono / metadata / sgen-marksweep.c
index b05cf9e4e02d6b9ac9f72fe8d844d7bbf5edd8c8..88664226b9abdcec1108362a26abe1f9a24b2525 100644 (file)
 
 #define SGEN_HAVE_CONCURRENT_MARK
 
-#define MS_BLOCK_SIZE  (16*1024)
-#define MS_BLOCK_SIZE_SHIFT    14
+#if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
+#define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
+#define MS_BLOCK_SIZE_SHIFT    ARCH_MIN_MS_BLOCK_SIZE_SHIFT
+#else
+#define MS_BLOCK_SIZE_SHIFT     14      /* INT FASTENABLE */
+#define MS_BLOCK_SIZE           (1 << MS_BLOCK_SIZE_SHIFT)
+#endif
 #define MAJOR_SECTION_SIZE     MS_BLOCK_SIZE
 #define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES)
 
  * of a block is the MSBlockHeader, then opional padding, then come
  * the objects, so this must be >= sizeof (MSBlockHeader).
  */
-#define MS_BLOCK_SKIP  16
+#define MS_BLOCK_SKIP  ((sizeof (MSBlockHeader) + 15) & ~15)
 
 #define MS_BLOCK_FREE  (MS_BLOCK_SIZE - MS_BLOCK_SKIP)
 
 #define MS_NUM_MARK_WORDS      ((MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (mword) * 8 - 1) / (sizeof (mword) * 8))
 
-#if SGEN_MAX_SMALL_OBJ_SIZE > MS_BLOCK_FREE / 2
-#error MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2
-#endif
-
 typedef struct _MSBlockInfo MSBlockInfo;
 struct _MSBlockInfo {
        int obj_size;
@@ -81,7 +82,6 @@ struct _MSBlockInfo {
        unsigned int has_pinned : 1;    /* means cannot evacuate */
        unsigned int is_to_space : 1;
        unsigned int swept : 1;
-       char *block;
        void **free_list;
        MSBlockInfo *next_free;
        size_t pin_queue_first_entry;
@@ -92,17 +92,17 @@ struct _MSBlockInfo {
        mword mark_words [MS_NUM_MARK_WORDS];
 };
 
-#define MS_BLOCK_FOR_BLOCK_INFO(b)     ((b)->block)
+#define MS_BLOCK_FOR_BLOCK_INFO(b)     ((char*)(b))
 
 #define MS_BLOCK_OBJ(b,i)              (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i))
 #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size)            (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i))
 #define MS_BLOCK_DATA_FOR_OBJ(o)       ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1)))
 
 typedef struct {
-       MSBlockInfo *info;
+       MSBlockInfo info;
 } MSBlockHeader;
 
-#define MS_BLOCK_FOR_OBJ(o)            (((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
+#define MS_BLOCK_FOR_OBJ(o)            (&((MSBlockHeader*)MS_BLOCK_DATA_FOR_OBJ ((o)))->info)
 
 /* object index will always be small */
 #define MS_BLOCK_OBJ_INDEX(o,b)        ((int)(((char*)(o) - (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP)) / (b)->obj_size))
@@ -124,7 +124,7 @@ typedef struct {
 
 #define MS_OBJ_ALLOCED(o,b)    (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE))
 
-#define MS_BLOCK_OBJ_SIZE_FACTOR       (sqrt (2.0))
+#define MS_BLOCK_OBJ_SIZE_FACTOR       (pow (2.0, 1.0 / 3))
 
 /*
  * This way we can lookup block object size indexes for sizes up to
@@ -142,9 +142,9 @@ static int fast_block_obj_size_indexes [MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES];
 #define MS_BLOCK_TYPE_MAX      4
 
 static gboolean *evacuate_block_obj_sizes;
-static float evacuation_threshold = 0.0f;
+static float evacuation_threshold = 0.666f;
 #ifdef SGEN_HAVE_CONCURRENT_MARK
-static float concurrent_evacuation_threshold = 0.0f;
+static float concurrent_evacuation_threshold = 0.666f;
 static gboolean want_evacuation = FALSE;
 #endif
 
@@ -177,25 +177,41 @@ static size_t num_major_sections = 0;
 /* one free block list for each block object size */
 static MSBlockInfo **free_block_lists [MS_BLOCK_TYPE_MAX];
 
-static long long stat_major_blocks_alloced = 0;
-static long long stat_major_blocks_freed = 0;
-static long long stat_major_blocks_lazy_swept = 0;
-static long long stat_major_objects_evacuated = 0;
+static guint64 stat_major_blocks_alloced = 0;
+static guint64 stat_major_blocks_freed = 0;
+static guint64 stat_major_blocks_lazy_swept = 0;
+static guint64 stat_major_objects_evacuated = 0;
 
 #if SIZEOF_VOID_P != 8
-static long long stat_major_blocks_freed_ideal = 0;
-static long long stat_major_blocks_freed_less_ideal = 0;
-static long long stat_major_blocks_freed_individual = 0;
-static long long stat_major_blocks_alloced_less_ideal = 0;
+static guint64 stat_major_blocks_freed_ideal = 0;
+static guint64 stat_major_blocks_freed_less_ideal = 0;
+static guint64 stat_major_blocks_freed_individual = 0;
+static guint64 stat_major_blocks_alloced_less_ideal = 0;
 #endif
 
 #ifdef SGEN_COUNT_NUMBER_OF_MAJOR_OBJECTS_MARKED
-static long long num_major_objects_marked = 0;
+static guint64 num_major_objects_marked = 0;
 #define INC_NUM_MAJOR_OBJECTS_MARKED() (++num_major_objects_marked)
 #else
 #define INC_NUM_MAJOR_OBJECTS_MARKED()
 #endif
 
+#ifdef SGEN_HEAVY_BINARY_PROTOCOL
+static mono_mutex_t scanned_objects_list_lock;
+static SgenPointerQueue scanned_objects_list;
+
+static void
+add_scanned_object (void *ptr)
+{
+       if (!binary_protocol_is_enabled ())
+               return;
+
+       mono_mutex_lock (&scanned_objects_list_lock);
+       sgen_pointer_queue_add (&scanned_objects_list, ptr);
+       mono_mutex_unlock (&scanned_objects_list_lock);
+}
+#endif
+
 static void
 sweep_block (MSBlockInfo *block, gboolean during_major_collection);
 
@@ -400,7 +416,6 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
        int size = block_obj_sizes [size_index];
        int count = MS_BLOCK_FREE / size;
        MSBlockInfo *info;
-       MSBlockHeader *header;
        MSBlockInfo **free_blocks = FREE_BLOCKS (pinned, has_references);
        char *obj_start;
        int i;
@@ -408,7 +423,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
        if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR))
                return FALSE;
 
-       info = sgen_alloc_internal (INTERNAL_MEM_MS_BLOCK_INFO);
+       info = (MSBlockInfo*)ms_get_empty_block ();
 
        SGEN_ASSERT (9, count >= 2, "block with %d objects, it must hold at least 2", count);
 
@@ -425,10 +440,6 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         */
        info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD);
        info->swept = 1;
-       info->block = ms_get_empty_block ();
-
-       header = (MSBlockHeader*) info->block;
-       header->info = info;
 #ifdef SGEN_HAVE_CONCURRENT_MARK
        info->cardtable_mod_union = NULL;
 #endif
@@ -818,30 +829,24 @@ major_dump_heap (FILE *heap_dump_file)
 
 #define MS_MARK_OBJECT_AND_ENQUEUE_CHECKED(obj,desc,block,queue) do {  \
                int __word, __bit;                                      \
-               SGEN_ASSERT (0, sgen_get_current_collection_generation () == GENERATION_OLD, "Can't majorly enqueue objects when doing minor collection"); \
                MS_CALC_MARK_BIT (__word, __bit, (obj));                \
-               if (MS_OBJ_ALLOCED ((obj), (block))) { \
-                       if ((block)->has_references) {                  \
+               if (!MS_MARK_BIT ((block), __word, __bit) && MS_OBJ_ALLOCED ((obj), (block))) { \
+                       MS_SET_MARK_BIT ((block), __word, __bit);       \
+                       if (sgen_gc_descr_has_references (desc))                        \
                                GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
-                       } else {                                        \
-                               MS_SET_MARK_BIT ((block), __word, __bit); \
-                               binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
-                       }                                               \
+                       binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
                        INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                }                                                       \
        } while (0)
 #define MS_MARK_OBJECT_AND_ENQUEUE(obj,desc,block,queue) do {          \
                int __word, __bit;                                      \
-               SGEN_ASSERT (0, sgen_get_current_collection_generation () == GENERATION_OLD, "Can't majorly enqueue objects when doing minor collection"); \
                MS_CALC_MARK_BIT (__word, __bit, (obj));                \
-               SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj);       \
-               {               \
-                       if ((block)->has_references) {                  \
+               SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
+               if (!MS_MARK_BIT ((block), __word, __bit)) {            \
+                       MS_SET_MARK_BIT ((block), __word, __bit);       \
+                       if (sgen_gc_descr_has_references (desc))                        \
                                GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
-                       } else {                                        \
-                               MS_SET_MARK_BIT ((block), __word, __bit); \
-                               binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
-                       }                                               \
+                       binary_protocol_mark ((obj), (gpointer)LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((MonoObject*)(obj))); \
                        INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                }                                                       \
        } while (0)
@@ -865,7 +870,7 @@ pin_major_object (char *obj, SgenGrayQueue *queue)
 
 #ifdef SGEN_HAVE_CONCURRENT_MARK
 static void
-major_copy_or_mark_object_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
+major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
 {
        SGEN_ASSERT (9, sgen_concurrent_collection_in_progress (), "Why are we scanning concurrently when there's no concurrent collection on?");
        SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_is_worker_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
@@ -900,141 +905,6 @@ major_copy_or_mark_object_concurrent (void **ptr, void *obj, SgenGrayQueue *queu
 }
 #endif
 
-static void
-major_copy_or_mark_object (void **ptr, void *obj, SgenGrayQueue *queue)
-{
-       MSBlockInfo *block;
-
-       HEAVY_STAT (++stat_copy_object_called_major);
-
-       SGEN_ASSERT (9, !sgen_concurrent_collection_in_progress (), "Why are we scanning non-concurrently when there's a concurrent collection on?");
-
-       SGEN_ASSERT (9, obj, "null object from pointer %p", ptr);
-       SGEN_ASSERT (9, current_collection_generation == GENERATION_OLD, "old gen parallel allocator called from a %d collection", current_collection_generation);
-
-       if (sgen_ptr_in_nursery (obj)) {
-               int word, bit;
-               char *forwarded, *old_obj;
-
-               if ((forwarded = SGEN_OBJECT_IS_FORWARDED (obj))) {
-                       SGEN_UPDATE_REFERENCE (ptr, forwarded);
-                       return;
-               }
-               if (SGEN_OBJECT_IS_PINNED (obj))
-                       return;
-
-               /* An object in the nursery To Space has already been copied and grayed. Nothing to do. */
-               if (sgen_nursery_is_to_space (obj))
-                       return;
-
-               HEAVY_STAT (++stat_objects_copied_major);
-
-       do_copy_object:
-               old_obj = obj;
-               obj = copy_object_no_checks (obj, queue);
-               if (G_UNLIKELY (old_obj == obj)) {
-                       /*If we fail to evacuate an object we just stop doing it for a given block size as all other will surely fail too.*/
-                       if (!sgen_ptr_in_nursery (obj)) {
-                               int size_index;
-                               block = MS_BLOCK_FOR_OBJ (obj);
-                               size_index = block->obj_size_index;
-                               evacuate_block_obj_sizes [size_index] = FALSE;
-                               MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
-                       }
-                       return;
-               }
-               SGEN_UPDATE_REFERENCE (ptr, obj);
-
-               /*
-                * FIXME: See comment for copy_object_no_checks().  If
-                * we have that, we can let the allocation function
-                * give us the block info, too, and we won't have to
-                * re-fetch it.
-                *
-                * FIXME (2): We should rework this to avoid all those nursery checks.
-                */
-               /*
-                * For the split nursery allocator the object might
-                * still be in the nursery despite having being
-                * promoted, in which case we can't mark it.
-                */
-               if (!sgen_ptr_in_nursery (obj)) {
-                       block = MS_BLOCK_FOR_OBJ (obj);
-                       MS_CALC_MARK_BIT (word, bit, obj);
-                       SGEN_ASSERT (9, !MS_MARK_BIT (block, word, bit), "object %p already marked", obj);
-                       if (!SGEN_VTABLE_HAS_REFERENCES (LOAD_VTABLE (obj))) {
-                               MS_SET_MARK_BIT (block, word, bit);
-                               binary_protocol_mark (obj, (gpointer)LOAD_VTABLE (obj), sgen_safe_object_get_size ((MonoObject*)obj));
-                       }
-               }
-       } else {
-               char *forwarded;
-               mword objsize;
-
-               /*
-                * If we have don't have a fixed heap we cannot know
-                * whether an object is in the LOS or in the small
-                * object major heap without checking its size.  To do
-                * that, however, we need to know that we actually
-                * have a valid object, not a forwarding pointer, so
-                * we have to do this check first.
-                */
-               if ((forwarded = SGEN_OBJECT_IS_FORWARDED (obj))) {
-                       SGEN_UPDATE_REFERENCE (ptr, forwarded);
-                       return;
-               }
-
-               objsize = SGEN_ALIGN_UP (sgen_safe_object_get_size ((MonoObject*)obj));
-
-               if (objsize <= SGEN_MAX_SMALL_OBJ_SIZE) {
-                       int size_index;
-                       gboolean evacuate;
-
-                       block = MS_BLOCK_FOR_OBJ (obj);
-                       size_index = block->obj_size_index;
-                       evacuate = evacuate_block_obj_sizes [size_index];
-
-                       if (evacuate && !block->has_pinned) {
-                               g_assert (!SGEN_OBJECT_IS_PINNED (obj));
-                               if (block->is_to_space)
-                                       return;
-                               HEAVY_STAT (++stat_major_objects_evacuated);
-                               goto do_copy_object;
-                       } else {
-                               MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
-                       }
-               } else {
-                       if (sgen_los_object_is_pinned (obj))
-                               return;
-                       binary_protocol_pin (obj, (gpointer)SGEN_LOAD_VTABLE (obj), sgen_safe_object_get_size ((MonoObject*)obj));
-
-#ifdef ENABLE_DTRACE
-                       if (G_UNLIKELY (MONO_GC_OBJ_PINNED_ENABLED ())) {
-                               MonoVTable *vt = (MonoVTable*)SGEN_LOAD_VTABLE (obj);
-                               MONO_GC_OBJ_PINNED ((mword)obj, sgen_safe_object_get_size (obj), vt->klass->name_space, vt->klass->name, GENERATION_OLD);
-                       }
-#endif
-
-                       if (SGEN_OBJECT_HAS_REFERENCES (obj))
-                               GRAY_OBJECT_ENQUEUE (queue, obj, sgen_obj_get_descriptor (obj));
-               }
-       }
-}
-
-static void
-major_copy_or_mark_object_canonical (void **ptr, SgenGrayQueue *queue)
-{
-       major_copy_or_mark_object (ptr, *ptr, queue);
-}
-
-#ifdef SGEN_HAVE_CONCURRENT_MARK
-static void
-major_copy_or_mark_object_concurrent_canonical (void **ptr, SgenGrayQueue *queue)
-{
-       major_copy_or_mark_object_concurrent (ptr, *ptr, queue);
-}
-#endif
-
 static long long
 major_get_and_reset_num_major_objects_marked (void)
 {
@@ -1047,170 +917,71 @@ major_get_and_reset_num_major_objects_marked (void)
 #endif
 }
 
-#include "sgen-major-scan-object.h"
-
-#ifdef SGEN_HAVE_CONCURRENT_MARK
-#define SCAN_FOR_CONCURRENT_MARK
-#include "sgen-major-scan-object.h"
-#undef SCAN_FOR_CONCURRENT_MARK
-#endif
-
-#if !defined (FIXED_HEAP) && !defined (SGEN_PARALLEL_MARK)
-//#define USE_PREFETCH_QUEUE
-
 #ifdef HEAVY_STATISTICS
-static long long stat_optimized_copy_object_called;
-static long long stat_optimized_nursery;
-static long long stat_optimized_nursery_forwarded;
-static long long stat_optimized_nursery_pinned;
-static long long stat_optimized_nursery_not_copied;
-static long long stat_optimized_nursery_regular;
-static long long stat_optimized_major;
-static long long stat_optimized_major_forwarded;
-static long long stat_optimized_major_small_fast;
-static long long stat_optimized_major_small_slow;
-static long long stat_optimized_major_large;
-
-static long long stat_drain_prefetch_fills;
-static long long stat_drain_prefetch_fill_failures;
-static long long stat_drain_loops;
+static guint64 stat_optimized_copy;
+static guint64 stat_optimized_copy_nursery;
+static guint64 stat_optimized_copy_nursery_forwarded;
+static guint64 stat_optimized_copy_nursery_pinned;
+static guint64 stat_optimized_copy_major;
+static guint64 stat_optimized_copy_major_small_fast;
+static guint64 stat_optimized_copy_major_small_slow;
+static guint64 stat_optimized_copy_major_large;
+static guint64 stat_optimized_copy_major_forwarded;
+static guint64 stat_optimized_copy_major_small_evacuate;
+static guint64 stat_optimized_major_scan;
+static guint64 stat_optimized_major_scan_no_refs;
+
+static guint64 stat_drain_prefetch_fills;
+static guint64 stat_drain_prefetch_fill_failures;
+static guint64 stat_drain_loops;
 #endif
 
-/* Returns whether the object is still in the nursery. */
-static gboolean
-optimized_copy_or_mark_object (void **ptr, void *obj, SgenGrayQueue *queue)
-{
-       HEAVY_STAT (++stat_optimized_copy_object_called);
-
-       SGEN_ASSERT (9, obj, "null object from pointer %p", ptr);
-       SGEN_ASSERT (9, current_collection_generation == GENERATION_OLD, "old gen parallel allocator called from a %d collection", current_collection_generation);
-
-       if (sgen_ptr_in_nursery (obj)) {
-               mword vtable_word = *(mword*)obj;
-               char *forwarded, *old_obj;
-
-               HEAVY_STAT (++stat_optimized_nursery);
-
-               if (SGEN_VTABLE_IS_PINNED (vtable_word)) {
-                       HEAVY_STAT (++stat_optimized_nursery_pinned);
-                       return TRUE;
-               }
-               if ((forwarded = SGEN_VTABLE_IS_FORWARDED (vtable_word))) {
-                       HEAVY_STAT (++stat_optimized_nursery_forwarded);
-                       *ptr = forwarded;
-                       return FALSE;
-               }
-
-               HEAVY_STAT (++stat_objects_copied_major);
-
-               old_obj = obj;
-               obj = copy_object_no_checks (obj, queue);
-
-               SGEN_ASSERT (0, old_obj != obj, "Cannot handle copy object failure.");
+static void major_scan_object_with_evacuation (char *start, mword desc, SgenGrayQueue *queue);
 
-               *ptr = obj;
+#define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_no_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_no_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_no_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
 
-               HEAVY_STAT (++stat_optimized_nursery_regular);
-
-               return FALSE;
-       } else {
-               GRAY_OBJECT_ENQUEUE (queue, obj, 0);
-       }
-       return FALSE;
-}
+#define COPY_OR_MARK_WITH_EVACUATION
+#define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_with_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_with_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_with_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
 
 static gboolean
 drain_gray_stack (ScanCopyContext ctx)
 {
-       SgenGrayQueue *queue = ctx.queue;
-
-       SGEN_ASSERT (0, ctx.scan_func == major_scan_object, "Wrong scan function");
-
-#ifdef USE_PREFETCH_QUEUE
-       HEAVY_STAT (++stat_drain_prefetch_fills);
-       if (!sgen_gray_object_fill_prefetch (queue)) {
-               HEAVY_STAT (++stat_drain_prefetch_fill_failures);
-               return TRUE;
-       }
-#endif
-
-       for (;;) {
-               char *obj;
-               mword desc;
-               int type;
-
-               HEAVY_STAT (++stat_drain_loops);
-
-#ifdef USE_PREFETCH_QUEUE
-               sgen_gray_object_dequeue_fast (queue, &obj, &desc);
-               if (!obj) {
-                       HEAVY_STAT (++stat_drain_prefetch_fills);
-                       if (!sgen_gray_object_fill_prefetch (queue)) {
-                               HEAVY_STAT (++stat_drain_prefetch_fill_failures);
-                               return TRUE;
-                       }
-                       continue;
+       gboolean evacuation = FALSE;
+       int i;
+       for (i = 0; i < num_block_obj_sizes; ++i) {
+               if (evacuate_block_obj_sizes [i]) {
+                       evacuation = TRUE;
+                       break;
                }
-#else
-               GRAY_OBJECT_DEQUEUE (queue, &obj, &desc);
-               if (!obj)
-                       return TRUE;
-#endif
-
-               desc = sgen_obj_get_descriptor_safe (obj);
-               type = desc & 7;
-
-               HEAVY_STAT (++stat_optimized_major);
-
-               /* Mark object or, if already marked, don't process. */
-               if (!sgen_ptr_in_nursery (obj)) {
-                       if (type == DESC_TYPE_SMALL_BITMAP || SGEN_ALIGN_UP (sgen_safe_object_get_size ((MonoObject*)obj)) <= SGEN_MAX_SMALL_OBJ_SIZE) {
-                               MSBlockInfo *block = MS_BLOCK_FOR_OBJ (obj);
-                               int __word, __bit;
-
-                               HEAVY_STAT (++stat_optimized_major_small_fast);
-
-                               MS_CALC_MARK_BIT (__word, __bit, (obj));
-                               if (MS_MARK_BIT ((block), __word, __bit))
-                                       continue;
-                               MS_SET_MARK_BIT ((block), __word, __bit);
-                       } else {
-                               HEAVY_STAT (++stat_optimized_major_large);
+       }
 
-                               if (sgen_los_object_is_pinned (obj))
-                                       continue;
-                               sgen_los_pin_object (obj);
-                       }
-               }
+       if (evacuation)
+               return drain_gray_stack_with_evacuation (ctx);
+       else
+               return drain_gray_stack_no_evacuation (ctx);
+}
 
-               /* Now scan the object. */
-#ifdef HEAVY_STATISTICS
-               sgen_descriptor_count_scanned_object (desc);
+#ifdef SGEN_HAVE_CONCURRENT_MARK
+#include "sgen-marksweep-scan-object-concurrent.h"
 #endif
-               if (type == DESC_TYPE_SMALL_BITMAP) {
-                       void **_objptr = (void**)(obj);
-                       gsize _bmap = (desc) >> 16;
-                       _objptr += OBJECT_HEADER_WORDS;
-                       do {
-                               int _index = GNUC_BUILTIN_CTZ (_bmap);
-                               _objptr += _index;
-                               _bmap >>= (_index + 1);
-
-                               void *__old = *(_objptr);
-                               if (__old) {
-                                       gboolean still_in_nursery = optimized_copy_or_mark_object (_objptr, __old, queue);
-                                       if (G_UNLIKELY (still_in_nursery && !sgen_ptr_in_nursery ((_objptr)))) {
-                                               void *__copy = *(_objptr);
-                                               sgen_add_to_global_remset ((_objptr), __copy);
-                                       }
-                               }
 
-                               _objptr ++;
-                       } while (_bmap);
-               } else {
-                       major_scan_object_no_mark (obj, desc, queue);
-               }
-       }
+static void
+major_copy_or_mark_object_canonical (void **ptr, SgenGrayQueue *queue)
+{
+       major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
+}
+
+#ifdef SGEN_HAVE_CONCURRENT_MARK
+static void
+major_copy_or_mark_object_concurrent_canonical (void **ptr, SgenGrayQueue *queue)
+{
+       major_copy_or_mark_object_with_evacuation_concurrent (ptr, *ptr, queue);
 }
 #endif
 
@@ -1431,8 +1202,7 @@ ms_sweep (void)
                        DELETE_BLOCK_IN_FOREACH ();
 
                        binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
-                       ms_free_block (block->block);
-                       sgen_free_internal (block, INTERNAL_MEM_MS_BLOCK_INFO);
+                       ms_free_block (block);
 
                        --num_major_sections;
                }
@@ -1442,7 +1212,6 @@ ms_sweep (void)
        for (i = 0; i < num_block_obj_sizes; ++i) {
                float usage = (float)slots_used [i] / (float)slots_available [i];
                if (num_blocks [i] > 5 && usage < evacuation_threshold) {
-                       g_assert_not_reached ();
                        evacuate_block_obj_sizes [i] = TRUE;
                        /*
                        g_print ("slot size %d - %d of %d used\n",
@@ -1597,8 +1366,18 @@ major_start_major_collection (void)
 }
 
 static void
-major_finish_major_collection (void)
+major_finish_major_collection (ScannedObjectCounts *counts)
 {
+#ifdef SGEN_HEAVY_BINARY_PROTOCOL
+       if (binary_protocol_is_enabled ()) {
+               counts->num_scanned_objects = scanned_objects_list.next_slot;
+
+               sgen_pointer_queue_sort_uniq (&scanned_objects_list);
+               counts->num_unique_scanned_objects = scanned_objects_list.next_slot;
+
+               sgen_pointer_queue_clear (&scanned_objects_list);
+       }
+#endif
 }
 
 #if SIZEOF_VOID_P != 8
@@ -1825,7 +1604,6 @@ get_num_major_sections (void)
 static gboolean
 major_handle_gc_param (const char *opt)
 {
-       /*
        if (g_str_has_prefix (opt, "evacuation-threshold=")) {
                const char *arg = strchr (opt, '=') + 1;
                int percentage = atoi (arg);
@@ -1835,8 +1613,7 @@ major_handle_gc_param (const char *opt)
                }
                evacuation_threshold = (float)percentage / 100.0f;
                return TRUE;
-               } else */
-       if (!strcmp (opt, "lazy-sweep")) {
+       } else if (!strcmp (opt, "lazy-sweep")) {
                lazy_sweep = TRUE;
                return TRUE;
        } else if (!strcmp (opt, "no-lazy-sweep")) {
@@ -1870,10 +1647,10 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
 }
 
 #ifdef HEAVY_STATISTICS
-extern long long marked_cards;
-extern long long scanned_cards;
-extern long long scanned_objects;
-extern long long remarked_cards;
+extern guint64 marked_cards;
+extern guint64 scanned_cards;
+extern guint64 scanned_objects;
+extern guint64 remarked_cards;
 #endif
 
 #define CARD_WORDS_PER_BLOCK (CARDS_PER_BLOCK / SIZEOF_VOID_P)
@@ -1983,6 +1760,8 @@ major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
                        end = block_start + MS_BLOCK_SIZE;
                        base = sgen_card_table_align_pointer (obj);
 
+                       cards += MS_BLOCK_SKIP >> CARD_BITS;
+
                        while (obj < end) {
                                size_t card_offset;
 
@@ -2037,6 +1816,8 @@ major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
                        }
                        card_data_end = card_data + CARDS_PER_BLOCK;
 
+                       card_data += MS_BLOCK_SKIP >> CARD_BITS;
+
                        for (card_data = initial_skip_card (card_data); card_data < card_data_end; ++card_data) { //card_data = skip_card (card_data + 1, card_data_end)) {
                                size_t index;
                                size_t idx = card_data - card_base;
@@ -2187,15 +1968,15 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
                g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
 
-       mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_alloced);
-       mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed);
-       mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_lazy_swept);
-       mono_counters_register ("# major objects evacuated", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_objects_evacuated);
+       mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
+       mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
+       mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
+       mono_counters_register ("# major objects evacuated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_objects_evacuated);
 #if SIZEOF_VOID_P != 8
-       mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_ideal);
-       mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_less_ideal);
-       mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_freed_individual);
-       mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_major_blocks_alloced_less_ideal);
+       mono_counters_register ("# major blocks freed ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_ideal);
+       mono_counters_register ("# major blocks freed less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_less_ideal);
+       mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual);
+       mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal);
 #endif
 
        collector->section_size = MAJOR_SECTION_SIZE;
@@ -2258,11 +2039,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->count_cards = major_count_cards;
 
        collector->major_ops.copy_or_mark_object = major_copy_or_mark_object_canonical;
-       collector->major_ops.scan_object = major_scan_object;
+       collector->major_ops.scan_object = major_scan_object_with_evacuation;
 #ifdef SGEN_HAVE_CONCURRENT_MARK
        if (is_concurrent) {
                collector->major_concurrent_ops.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
-               collector->major_concurrent_ops.scan_object = major_scan_object_concurrent;
+               collector->major_concurrent_ops.scan_object = major_scan_object_no_mark_concurrent;
                collector->major_concurrent_ops.scan_vtype = major_scan_vtype_concurrent;
        }
 #endif
@@ -2273,25 +2054,29 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
                collector->drain_gray_stack = drain_gray_stack;
 
 #ifdef HEAVY_STATISTICS
-       mono_counters_register ("Optimized copy object called", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_copy_object_called);
-       mono_counters_register ("Optimized nursery", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery);
-       mono_counters_register ("Optimized nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery_forwarded);
-       mono_counters_register ("Optimized nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery_pinned);
-       mono_counters_register ("Optimized nursery not copied", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery_not_copied);
-       mono_counters_register ("Optimized nursery regular", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_nursery_regular);
-       mono_counters_register ("Optimized major", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major);
-       mono_counters_register ("Optimized major forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_forwarded);
-       mono_counters_register ("Optimized major small fast", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_small_fast);
-       mono_counters_register ("Optimized major small slow", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_small_slow);
-       mono_counters_register ("Optimized major large", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_optimized_major_large);
-
-       mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_loops);
-       mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fills);
-       mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_drain_prefetch_fill_failures);
+       mono_counters_register ("Optimized copy", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy);
+       mono_counters_register ("Optimized copy nursery", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery);
+       mono_counters_register ("Optimized copy nursery forwarded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_forwarded);
+       mono_counters_register ("Optimized copy nursery pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_nursery_pinned);
+       mono_counters_register ("Optimized copy major", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major);
+       mono_counters_register ("Optimized copy major small fast", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_fast);
+       mono_counters_register ("Optimized copy major small slow", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_small_slow);
+       mono_counters_register ("Optimized copy major large", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_copy_major_large);
+       mono_counters_register ("Optimized major scan", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan);
+       mono_counters_register ("Optimized major scan no refs", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_optimized_major_scan_no_refs);
+
+       mono_counters_register ("Gray stack drain loops", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_loops);
+       mono_counters_register ("Gray stack prefetch fills", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fills);
+       mono_counters_register ("Gray stack prefetch failures", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_drain_prefetch_fill_failures);
+#endif
 #endif
 
+#ifdef SGEN_HEAVY_BINARY_PROTOCOL
+       mono_mutex_init (&scanned_objects_list_lock);
 #endif
 
+       SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2");
+
        /*cardtable requires major pages to be 8 cards aligned*/
        g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0);
 }