Merge pull request #1588 from BrzVlad/feature-aot-wbarrier
[mono.git] / mono / metadata / sgen-gc.c
index 5c6881be5272dafdc31b7c0eb09ba6ca0bc337fd..532e1f74aa5287eba0508c99748a59418f28face 100644 (file)
 #endif
 #include <stdio.h>
 #include <string.h>
-#include <signal.h>
 #include <errno.h>
 #include <assert.h>
 
@@ -279,6 +278,7 @@ static gboolean conservative_stack_mark = FALSE;
 /* If set, do a plausibility check on the scan_starts before and after
    each collection */
 static gboolean do_scan_starts_check = FALSE;
+
 /*
  * If the major collector is concurrent and this is FALSE, we will
  * never initiate a synchronous major collection, unless requested via
@@ -293,23 +293,23 @@ static gboolean do_dump_nursery_content = FALSE;
 static gboolean enable_nursery_canaries = FALSE;
 
 #ifdef HEAVY_STATISTICS
-long long stat_objects_alloced_degraded = 0;
-long long stat_bytes_alloced_degraded = 0;
+guint64 stat_objects_alloced_degraded = 0;
+guint64 stat_bytes_alloced_degraded = 0;
 
-long long stat_copy_object_called_nursery = 0;
-long long stat_objects_copied_nursery = 0;
-long long stat_copy_object_called_major = 0;
-long long stat_objects_copied_major = 0;
+guint64 stat_copy_object_called_nursery = 0;
+guint64 stat_objects_copied_nursery = 0;
+guint64 stat_copy_object_called_major = 0;
+guint64 stat_objects_copied_major = 0;
 
-long long stat_scan_object_called_nursery = 0;
-long long stat_scan_object_called_major = 0;
+guint64 stat_scan_object_called_nursery = 0;
+guint64 stat_scan_object_called_major = 0;
 
-long long stat_slots_allocated_in_vain;
+guint64 stat_slots_allocated_in_vain;
 
-long long stat_nursery_copy_object_failed_from_space = 0;
-long long stat_nursery_copy_object_failed_forwarded = 0;
-long long stat_nursery_copy_object_failed_pinned = 0;
-long long stat_nursery_copy_object_failed_to_space = 0;
+guint64 stat_nursery_copy_object_failed_from_space = 0;
+guint64 stat_nursery_copy_object_failed_forwarded = 0;
+guint64 stat_nursery_copy_object_failed_pinned = 0;
+guint64 stat_nursery_copy_object_failed_to_space = 0;
 
 static int stat_wbarrier_add_to_global_remset = 0;
 static int stat_wbarrier_set_field = 0;
@@ -322,32 +322,32 @@ static int stat_wbarrier_value_copy = 0;
 static int stat_wbarrier_object_copy = 0;
 #endif
 
-static long long stat_pinned_objects = 0;
-
-static long long time_minor_pre_collection_fragment_clear = 0;
-static long long time_minor_pinning = 0;
-static long long time_minor_scan_remsets = 0;
-static long long time_minor_scan_pinned = 0;
-static long long time_minor_scan_registered_roots = 0;
-static long long time_minor_scan_thread_data = 0;
-static long long time_minor_finish_gray_stack = 0;
-static long long time_minor_fragment_creation = 0;
-
-static long long time_major_pre_collection_fragment_clear = 0;
-static long long time_major_pinning = 0;
-static long long time_major_scan_pinned = 0;
-static long long time_major_scan_registered_roots = 0;
-static long long time_major_scan_thread_data = 0;
-static long long time_major_scan_alloc_pinned = 0;
-static long long time_major_scan_finalized = 0;
-static long long time_major_scan_big_objects = 0;
-static long long time_major_finish_gray_stack = 0;
-static long long time_major_free_bigobjs = 0;
-static long long time_major_los_sweep = 0;
-static long long time_major_sweep = 0;
-static long long time_major_fragment_creation = 0;
-
-static long long time_max = 0;
+static guint64 stat_pinned_objects = 0;
+
+static guint64 time_minor_pre_collection_fragment_clear = 0;
+static guint64 time_minor_pinning = 0;
+static guint64 time_minor_scan_remsets = 0;
+static guint64 time_minor_scan_pinned = 0;
+static guint64 time_minor_scan_registered_roots = 0;
+static guint64 time_minor_scan_thread_data = 0;
+static guint64 time_minor_finish_gray_stack = 0;
+static guint64 time_minor_fragment_creation = 0;
+
+static guint64 time_major_pre_collection_fragment_clear = 0;
+static guint64 time_major_pinning = 0;
+static guint64 time_major_scan_pinned = 0;
+static guint64 time_major_scan_registered_roots = 0;
+static guint64 time_major_scan_thread_data = 0;
+static guint64 time_major_scan_alloc_pinned = 0;
+static guint64 time_major_scan_finalized = 0;
+static guint64 time_major_scan_big_objects = 0;
+static guint64 time_major_finish_gray_stack = 0;
+static guint64 time_major_free_bigobjs = 0;
+static guint64 time_major_los_sweep = 0;
+static guint64 time_major_sweep = 0;
+static guint64 time_major_fragment_creation = 0;
+
+static guint64 time_max = 0;
 
 static SGEN_TV_DECLARE (time_major_conc_collection_start);
 static SGEN_TV_DECLARE (time_major_conc_collection_end);
@@ -537,15 +537,6 @@ static mword objects_pinned;
  * ######################################################################
  */
 
-inline static void*
-align_pointer (void *ptr)
-{
-       mword p = (mword)ptr;
-       p += sizeof (gpointer) - 1;
-       p &= ~ (sizeof (gpointer) - 1);
-       return (void*)p;
-}
-
 typedef SgenGrayQueue GrayQueue;
 
 /* forward declarations */
@@ -567,14 +558,12 @@ static int mark_ephemerons_in_range (ScanCopyContext ctx);
 static void clear_unreachable_ephemerons (ScanCopyContext ctx);
 static void null_ephemerons_for_domain (MonoDomain *domain);
 
-static gboolean major_update_or_finish_concurrent_collection (gboolean force_finish);
-
 SgenObjectOperations current_object_ops;
 SgenMajorCollector major_collector;
 SgenMinorCollector sgen_minor_collector;
 static GrayQueue gray_queue;
 
-static SgenRemeberedSet remset;
+static SgenRememberedSet remset;
 
 /* The gray queue to use from the main collection thread. */
 #define WORKERS_DISTRIBUTE_GRAY_QUEUE  (&gray_queue)
@@ -901,9 +890,6 @@ sgen_drain_gray_stack (int max_objs, ScanCopyContext ctx)
                        if (!obj)
                                return TRUE;
                        SGEN_LOG (9, "Precise gray object scan %p (%s)", obj, safe_name (obj));
-#ifndef SGEN_GRAY_QUEUE_HAVE_DESCRIPTORS
-                       desc = sgen_obj_get_descriptor_safe (obj);
-#endif
                        scan_func (obj, desc, queue);
                }
        } while (max_objs < 0);
@@ -991,7 +977,7 @@ pin_objects_from_nursery_pin_queue (ScanCopyContext ctx)
                 * search_start must point to zeroed mem or point to an object.
                 */
                do {
-                       size_t obj_size;
+                       size_t obj_size, canarified_obj_size;
 
                        /* Skip zeros. */
                        if (!*(void**)search_start) {
@@ -1000,22 +986,27 @@ pin_objects_from_nursery_pin_queue (ScanCopyContext ctx)
                                continue;
                        }
 
-                       obj_size = ALIGN_UP (safe_object_get_size ((MonoObject*)search_start));
-
-                       if (addr >= search_start && (char*)addr < (char*)search_start + obj_size) {
-                               /* This is the object we're looking for. */
-                               obj_to_pin = search_start;
-                               obj_to_pin_size = obj_size;
-                               break;
-                       }
+                       canarified_obj_size = obj_size = ALIGN_UP (safe_object_get_size ((MonoObject*)search_start));
 
-                       /* Skip to the next object */
+                       /*
+                        * Filler arrays are marked by an invalid sync word.  We don't
+                        * consider them for pinning.  They are not delimited by canaries,
+                        * either.
+                        */
                        if (((MonoObject*)search_start)->synchronisation != GINT_TO_POINTER (-1)) {
                                CHECK_CANARY_FOR_OBJECT (search_start);
-                               CANARIFY_SIZE (obj_size);
-                               CANARIFY_SIZE (obj_to_pin_size);
+                               CANARIFY_SIZE (canarified_obj_size);
+
+                               if (addr >= search_start && (char*)addr < (char*)search_start + obj_size) {
+                                       /* This is the object we're looking for. */
+                                       obj_to_pin = search_start;
+                                       obj_to_pin_size = canarified_obj_size;
+                                       break;
+                               }
                        }
-                       search_start = (void*)((char*)search_start + obj_size);
+
+                       /* Skip to the next object */
+                       search_start = (void*)((char*)search_start + canarified_obj_size);
                } while (search_start <= addr);
 
                /* We've searched past the address we were looking for. */
@@ -1096,18 +1087,27 @@ pin_objects_in_nursery (ScanCopyContext ctx)
        nursery_section->pin_queue_last_entry = nursery_section->pin_queue_first_entry + reduced_to;
 }
 
-
+/*
+ * This function is only ever called (via `collector_pin_object()` in `sgen-copy-object.h`)
+ * when we can't promote an object because we're out of memory.
+ */
 void
 sgen_pin_object (void *object, GrayQueue *queue)
 {
-       SGEN_PIN_OBJECT (object);
+       /*
+        * All pinned objects are assumed to have been staged, so we need to stage as well.
+        * Also, the count of staged objects shows that "late pinning" happened.
+        */
        sgen_pin_stage_ptr (object);
+
+       SGEN_PIN_OBJECT (object);
+       binary_protocol_pin (object, (gpointer)LOAD_VTABLE (object), safe_object_get_size (object));
+
        ++objects_pinned;
        if (G_UNLIKELY (do_pin_stats))
                sgen_pin_stats_register_object (object, safe_object_get_size (object));
 
        GRAY_OBJECT_ENQUEUE (queue, object, sgen_obj_get_descriptor_safe (object));
-       binary_protocol_pin (object, (gpointer)LOAD_VTABLE (object), safe_object_get_size (object));
 
 #ifdef ENABLE_DTRACE
        if (G_UNLIKELY (MONO_GC_OBJ_PINNED_ENABLED ())) {
@@ -1118,36 +1118,6 @@ sgen_pin_object (void *object, GrayQueue *queue)
 #endif
 }
 
-void
-sgen_parallel_pin_or_update (void **ptr, void *obj, MonoVTable *vt, SgenGrayQueue *queue)
-{
-       for (;;) {
-               mword vtable_word;
-               gboolean major_pinned = FALSE;
-
-               if (sgen_ptr_in_nursery (obj)) {
-                       if (SGEN_CAS_PTR (obj, SGEN_POINTER_TAG_PINNED (vt), vt) == vt) {
-                               sgen_pin_object (obj, queue);
-                               break;
-                       }
-               } else {
-                       major_collector.pin_major_object (obj, queue);
-                       major_pinned = TRUE;
-               }
-
-               vtable_word = *(mword*)obj;
-               /*someone else forwarded it, update the pointer and bail out*/
-               if (SGEN_POINTER_IS_TAGGED_FORWARDED (vtable_word)) {
-                       *ptr = SGEN_POINTER_UNTAG_VTABLE (vtable_word);
-                       break;
-               }
-
-               /*someone pinned it, nothing to do.*/
-               if (SGEN_POINTER_IS_TAGGED_PINNED (vtable_word) || major_pinned)
-                       break;
-       }
-}
-
 /* Sort the addresses in array in increasing order.
  * Done using a by-the book heap sort. Which has decent and stable performance, is pretty cache efficient.
  */
@@ -1415,11 +1385,7 @@ alloc_nursery (void)
        /* If there isn't enough space even for the nursery we should simply abort. */
        g_assert (sgen_memgov_try_alloc_space (alloc_size, SPACE_NURSERY));
 
-#ifdef SGEN_ALIGN_NURSERY
        data = major_collector.alloc_heap (alloc_size, alloc_size, DEFAULT_NURSERY_BITS);
-#else
-       data = major_collector.alloc_heap (alloc_size, 0, DEFAULT_NURSERY_BITS);
-#endif
        sgen_update_heap_boundaries ((mword)data, (mword)(data + sgen_nursery_size));
        SGEN_LOG (4, "Expanding nursery size (%p-%p): %lu, total: %lu", data, data + alloc_size, (unsigned long)sgen_nursery_size, (unsigned long)mono_gc_get_heap_size ());
        section->data = section->next_data = data;
@@ -1438,11 +1404,7 @@ void*
 mono_gc_get_nursery (int *shift_bits, size_t *size)
 {
        *size = sgen_nursery_size;
-#ifdef SGEN_ALIGN_NURSERY
        *shift_bits = DEFAULT_NURSERY_BITS;
-#else
-       *shift_bits = -1;
-#endif
        return sgen_get_nursery_start ();
 }
 
@@ -1801,13 +1763,13 @@ sgen_dump_section (GCMemSection *section, const char *type)
        char *end = section->data + section->size;
        char *occ_start = NULL;
        GCVTable *vt;
-       char *old_start = NULL; /* just for debugging */
+       char *old_start G_GNUC_UNUSED = NULL; /* just for debugging */
 
        fprintf (heap_dump_file, "<section type=\"%s\" size=\"%lu\">\n", type, (unsigned long)section->size);
 
        while (start < end) {
                guint size;
-               MonoClass *class;
+               MonoClass *class G_GNUC_UNUSED;
 
                if (!*(void**)start) {
                        if (occ_start) {
@@ -1934,32 +1896,32 @@ init_stats (void)
        if (inited)
                return;
 
-       mono_counters_register ("Collection max time",  MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME | MONO_COUNTER_MONOTONIC, &time_max);
-
-       mono_counters_register ("Minor fragment clear", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_pre_collection_fragment_clear);
-       mono_counters_register ("Minor pinning", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_pinning);
-       mono_counters_register ("Minor scan remembered set", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_scan_remsets);
-       mono_counters_register ("Minor scan pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_scan_pinned);
-       mono_counters_register ("Minor scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_scan_registered_roots);
-       mono_counters_register ("Minor scan thread data", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_scan_thread_data);
-       mono_counters_register ("Minor finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_finish_gray_stack);
-       mono_counters_register ("Minor fragment creation", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_minor_fragment_creation);
-
-       mono_counters_register ("Major fragment clear", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_pre_collection_fragment_clear);
-       mono_counters_register ("Major pinning", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_pinning);
-       mono_counters_register ("Major scan pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_pinned);
-       mono_counters_register ("Major scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_registered_roots);
-       mono_counters_register ("Major scan thread data", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_thread_data);
-       mono_counters_register ("Major scan alloc_pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_alloc_pinned);
-       mono_counters_register ("Major scan finalized", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_finalized);
-       mono_counters_register ("Major scan big objects", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_scan_big_objects);
-       mono_counters_register ("Major finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_finish_gray_stack);
-       mono_counters_register ("Major free big objects", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_free_bigobjs);
-       mono_counters_register ("Major LOS sweep", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_los_sweep);
-       mono_counters_register ("Major sweep", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_sweep);
-       mono_counters_register ("Major fragment creation", MONO_COUNTER_GC | MONO_COUNTER_LONG | MONO_COUNTER_TIME, &time_major_fragment_creation);
-
-       mono_counters_register ("Number of pinned objects", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_pinned_objects);
+       mono_counters_register ("Collection max time",  MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME | MONO_COUNTER_MONOTONIC, &time_max);
+
+       mono_counters_register ("Minor fragment clear", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pre_collection_fragment_clear);
+       mono_counters_register ("Minor pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pinning);
+       mono_counters_register ("Minor scan remembered set", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_remsets);
+       mono_counters_register ("Minor scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_pinned);
+       mono_counters_register ("Minor scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_registered_roots);
+       mono_counters_register ("Minor scan thread data", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_thread_data);
+       mono_counters_register ("Minor finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_finish_gray_stack);
+       mono_counters_register ("Minor fragment creation", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_fragment_creation);
+
+       mono_counters_register ("Major fragment clear", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pre_collection_fragment_clear);
+       mono_counters_register ("Major pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pinning);
+       mono_counters_register ("Major scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_pinned);
+       mono_counters_register ("Major scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_registered_roots);
+       mono_counters_register ("Major scan thread data", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_thread_data);
+       mono_counters_register ("Major scan alloc_pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_alloc_pinned);
+       mono_counters_register ("Major scan finalized", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_finalized);
+       mono_counters_register ("Major scan big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_big_objects);
+       mono_counters_register ("Major finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_finish_gray_stack);
+       mono_counters_register ("Major free big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_free_bigobjs);
+       mono_counters_register ("Major LOS sweep", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_los_sweep);
+       mono_counters_register ("Major sweep", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_sweep);
+       mono_counters_register ("Major fragment creation", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_fragment_creation);
+
+       mono_counters_register ("Number of pinned objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_pinned_objects);
 
 #ifdef HEAVY_STATISTICS
        mono_counters_register ("WBarrier remember pointer", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wbarrier_add_to_global_remset);
@@ -1972,23 +1934,23 @@ init_stats (void)
        mono_counters_register ("WBarrier value copy", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wbarrier_value_copy);
        mono_counters_register ("WBarrier object copy", MONO_COUNTER_GC | MONO_COUNTER_INT, &stat_wbarrier_object_copy);
 
-       mono_counters_register ("# objects allocated degraded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_objects_alloced_degraded);
-       mono_counters_register ("bytes allocated degraded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_bytes_alloced_degraded);
+       mono_counters_register ("# objects allocated degraded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_alloced_degraded);
+       mono_counters_register ("bytes allocated degraded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_bytes_alloced_degraded);
 
-       mono_counters_register ("# copy_object() called (nursery)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_copy_object_called_nursery);
-       mono_counters_register ("# objects copied (nursery)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_objects_copied_nursery);
-       mono_counters_register ("# copy_object() called (major)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_copy_object_called_major);
-       mono_counters_register ("# objects copied (major)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_objects_copied_major);
+       mono_counters_register ("# copy_object() called (nursery)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_copy_object_called_nursery);
+       mono_counters_register ("# objects copied (nursery)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_copied_nursery);
+       mono_counters_register ("# copy_object() called (major)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_copy_object_called_major);
+       mono_counters_register ("# objects copied (major)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_objects_copied_major);
 
-       mono_counters_register ("# scan_object() called (nursery)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_scan_object_called_nursery);
-       mono_counters_register ("# scan_object() called (major)", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_scan_object_called_major);
+       mono_counters_register ("# scan_object() called (nursery)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_scan_object_called_nursery);
+       mono_counters_register ("# scan_object() called (major)", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_scan_object_called_major);
 
-       mono_counters_register ("Slots allocated in vain", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_slots_allocated_in_vain);
+       mono_counters_register ("Slots allocated in vain", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_slots_allocated_in_vain);
 
-       mono_counters_register ("# nursery copy_object() failed from space", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_from_space);
-       mono_counters_register ("# nursery copy_object() failed forwarded", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_forwarded);
-       mono_counters_register ("# nursery copy_object() failed pinned", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_pinned);
-       mono_counters_register ("# nursery copy_object() failed to space", MONO_COUNTER_GC | MONO_COUNTER_LONG, &stat_nursery_copy_object_failed_to_space);
+       mono_counters_register ("# nursery copy_object() failed from space", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_nursery_copy_object_failed_from_space);
+       mono_counters_register ("# nursery copy_object() failed forwarded", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_nursery_copy_object_failed_forwarded);
+       mono_counters_register ("# nursery copy_object() failed pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_nursery_copy_object_failed_pinned);
+       mono_counters_register ("# nursery copy_object() failed to space", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_nursery_copy_object_failed_to_space);
 
        sgen_nursery_allocator_init_heavy_stats ();
        sgen_alloc_init_heavy_stats ();
@@ -2029,19 +1991,10 @@ sgen_concurrent_collection_in_progress (void)
        return concurrent_collection_in_progress;
 }
 
-typedef struct
-{
-       char *heap_start;
-       char *heap_end;
-} FinishRememberedSetScanJobData;
-
 static void
-job_finish_remembered_set_scan (WorkerData *worker_data, void *job_data_untyped)
+job_remembered_set_scan (WorkerData *worker_data, void *dummy)
 {
-       FinishRememberedSetScanJobData *job_data = job_data_untyped;
-
-       remset.finish_scan_remsets (job_data->heap_start, job_data->heap_end, sgen_workers_get_job_gray_queue (worker_data));
-       sgen_free_internal_dynamic (job_data, sizeof (FinishRememberedSetScanJobData), INTERNAL_MEM_WORKER_JOB_DATA);
+       remset.scan_remsets (sgen_workers_get_job_gray_queue (worker_data));
 }
 
 typedef struct
@@ -2171,13 +2124,13 @@ verify_nursery (void)
 static void
 check_nursery_is_clean (void)
 {
-       char *start, *end, *cur;
+       char *end, *cur;
 
-       start = cur = sgen_get_nursery_start ();
+       cur = sgen_get_nursery_start ();
        end = sgen_get_nursery_end ();
 
        while (cur < end) {
-               size_t ss, size;
+               size_t size;
 
                if (!*(void**)cur) {
                        cur += sizeof (void*);
@@ -2187,7 +2140,6 @@ check_nursery_is_clean (void)
                g_assert (!object_is_forwarded (cur));
                g_assert (!object_is_pinned (cur));
 
-               ss = safe_object_get_size ((MonoObject*)cur);
                size = ALIGN_UP (safe_object_get_size ((MonoObject*)cur));
                verify_scan_starts (cur, cur + size);
 
@@ -2214,7 +2166,6 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        gboolean needs_major;
        size_t max_garbage_amount;
        char *nursery_next;
-       FinishRememberedSetScanJobData *frssjd;
        ScanFromRegisteredRootsJobData *scrrjd_normal, *scrrjd_wbarrier;
        ScanThreadDataJobData *stdjd;
        mword fragment_total;
@@ -2311,10 +2262,12 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
 
        MONO_GC_CHECKPOINT_3 (GENERATION_NURSERY);
 
-       frssjd = sgen_alloc_internal_dynamic (sizeof (FinishRememberedSetScanJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       frssjd->heap_start = sgen_get_nursery_start ();
-       frssjd->heap_end = nursery_next;
-       sgen_workers_enqueue_job (job_finish_remembered_set_scan, frssjd);
+       /*
+        * FIXME: When we finish a concurrent collection we do a nursery collection first,
+        * as part of which we scan the card table.  Then, later, we scan the mod union
+        * cardtable.  We should only have to do one.
+        */
+       sgen_workers_enqueue_job ("scan remset", job_remembered_set_scan, NULL);
 
        /* we don't have complete write barrier yet, so we scan all the old generation sections */
        TV_GETTIME (btv);
@@ -2345,7 +2298,7 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        scrrjd_normal->heap_start = sgen_get_nursery_start ();
        scrrjd_normal->heap_end = nursery_next;
        scrrjd_normal->root_type = ROOT_TYPE_NORMAL;
-       sgen_workers_enqueue_job (job_scan_from_registered_roots, scrrjd_normal);
+       sgen_workers_enqueue_job ("scan from registered roots normal", job_scan_from_registered_roots, scrrjd_normal);
 
        scrrjd_wbarrier = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        scrrjd_wbarrier->copy_or_mark_func = current_object_ops.copy_or_mark_object;
@@ -2353,7 +2306,7 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        scrrjd_wbarrier->heap_start = sgen_get_nursery_start ();
        scrrjd_wbarrier->heap_end = nursery_next;
        scrrjd_wbarrier->root_type = ROOT_TYPE_WBARRIER;
-       sgen_workers_enqueue_job (job_scan_from_registered_roots, scrrjd_wbarrier);
+       sgen_workers_enqueue_job ("scan from registered roots wbarrier", job_scan_from_registered_roots, scrrjd_wbarrier);
 
        TV_GETTIME (btv);
        time_minor_scan_registered_roots += TV_ELAPSED (atv, btv);
@@ -2364,7 +2317,7 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        stdjd = sgen_alloc_internal_dynamic (sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        stdjd->heap_start = sgen_get_nursery_start ();
        stdjd->heap_end = nursery_next;
-       sgen_workers_enqueue_job (job_scan_thread_data, stdjd);
+       sgen_workers_enqueue_job ("scan thread data", job_scan_thread_data, stdjd);
 
        TV_GETTIME (atv);
        time_minor_scan_thread_data += TV_ELAPSED (btv, atv);
@@ -2375,8 +2328,8 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        g_assert (!sgen_collection_is_concurrent ());
 
        /* Scan the list of objects ready for finalization. If */
-       sgen_workers_enqueue_job (job_scan_finalizer_entries, fin_ready_list);
-       sgen_workers_enqueue_job (job_scan_finalizer_entries, critical_fin_list);
+       sgen_workers_enqueue_job ("scan finalizer entries", job_scan_finalizer_entries, fin_ready_list);
+       sgen_workers_enqueue_job ("scan criticial finalizer entries", job_scan_finalizer_entries, critical_fin_list);
 
        MONO_GC_CHECKPOINT_8 (GENERATION_NURSERY);
 
@@ -2527,7 +2480,6 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
         */
 
        objects_pinned = 0;
-       *major_collector.have_swept = FALSE;
 
        if (xdomain_checks) {
                sgen_clear_nursery_fragments ();
@@ -2536,7 +2488,7 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
 
        if (!concurrent_collection_in_progress) {
                /* Remsets are not useful for a major collection */
-               remset.prepare_for_major_collection ();
+               remset.clear_cards ();
        }
 
        sgen_process_fin_stage_entries ();
@@ -2558,9 +2510,6 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
                         */
                        sgen_pin_cemented_objects ();
                }
-
-               if (!concurrent_collection_in_progress)
-                       sgen_cement_reset ();
        }
 
        sgen_optimize_pin_queue ();
@@ -2580,7 +2529,6 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        SGEN_LOG (6, "Pinning from sections");
        /* first pass for the sections */
        sgen_find_section_pin_queue_start_end (nursery_section);
-       major_collector.find_pin_queue_start_ends (WORKERS_DISTRIBUTE_GRAY_QUEUE);
        /* identify possible pointers to the insize of large objects */
        SGEN_LOG (6, "Pinning from large objects");
        for (bigobj = los_object_list; bigobj; bigobj = bigobj->next) {
@@ -2599,9 +2547,7 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
                                g_assert (finish_up_concurrent_mark);
                                continue;
                        }
-#ifdef SGEN_MARK_ON_ENQUEUE
                        sgen_los_pin_object (bigobj->data);
-#endif
                        if (SGEN_OBJECT_HAS_REFERENCES (bigobj->data))
                                GRAY_OBJECT_ENQUEUE (WORKERS_DISTRIBUTE_GRAY_QUEUE, bigobj->data, sgen_obj_get_descriptor (bigobj->data));
                        if (G_UNLIKELY (do_pin_stats))
@@ -2691,7 +2637,7 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        scrrjd_normal->heap_start = heap_start;
        scrrjd_normal->heap_end = heap_end;
        scrrjd_normal->root_type = ROOT_TYPE_NORMAL;
-       sgen_workers_enqueue_job (job_scan_from_registered_roots, scrrjd_normal);
+       sgen_workers_enqueue_job ("scan from registered roots normal", job_scan_from_registered_roots, scrrjd_normal);
 
        scrrjd_wbarrier = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        scrrjd_wbarrier->copy_or_mark_func = current_object_ops.copy_or_mark_object;
@@ -2699,7 +2645,7 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        scrrjd_wbarrier->heap_start = heap_start;
        scrrjd_wbarrier->heap_end = heap_end;
        scrrjd_wbarrier->root_type = ROOT_TYPE_WBARRIER;
-       sgen_workers_enqueue_job (job_scan_from_registered_roots, scrrjd_wbarrier);
+       sgen_workers_enqueue_job ("scan from registered roots wbarrier", job_scan_from_registered_roots, scrrjd_wbarrier);
 
        TV_GETTIME (btv);
        time_major_scan_registered_roots += TV_ELAPSED (atv, btv);
@@ -2708,7 +2654,7 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        stdjd = sgen_alloc_internal_dynamic (sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        stdjd->heap_start = heap_start;
        stdjd->heap_end = heap_end;
-       sgen_workers_enqueue_job (job_scan_thread_data, stdjd);
+       sgen_workers_enqueue_job ("scan thread data", job_scan_thread_data, stdjd);
 
        TV_GETTIME (atv);
        time_major_scan_thread_data += TV_ELAPSED (btv, atv);
@@ -2720,15 +2666,15 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
                report_finalizer_roots ();
 
        /* scan the list of objects ready for finalization */
-       sgen_workers_enqueue_job (job_scan_finalizer_entries, fin_ready_list);
-       sgen_workers_enqueue_job (job_scan_finalizer_entries, critical_fin_list);
+       sgen_workers_enqueue_job ("scan finalizer entries", job_scan_finalizer_entries, fin_ready_list);
+       sgen_workers_enqueue_job ("scan critical finalizer entries", job_scan_finalizer_entries, critical_fin_list);
 
        if (scan_mod_union) {
                g_assert (finish_up_concurrent_mark);
 
                /* Mod union card table */
-               sgen_workers_enqueue_job (job_scan_major_mod_union_cardtable, NULL);
-               sgen_workers_enqueue_job (job_scan_los_mod_union_cardtable, NULL);
+               sgen_workers_enqueue_job ("scan mod union cardtable", job_scan_major_mod_union_cardtable, NULL);
+               sgen_workers_enqueue_job ("scan LOS mod union cardtable", job_scan_los_mod_union_cardtable, NULL);
        }
 
        TV_GETTIME (atv);
@@ -2737,16 +2683,25 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
 
        TV_GETTIME (btv);
        time_major_scan_big_objects += TV_ELAPSED (atv, btv);
+}
 
-       if (concurrent_collection_in_progress) {
-               /* prepare the pin queue for the next collection */
-               sgen_finish_pinning ();
+static void
+major_finish_copy_or_mark (void)
+{
+       if (!concurrent_collection_in_progress)
+               return;
 
-               sgen_pin_stats_reset ();
+       /*
+        * Prepare the pin queue for the next collection.  Since pinning runs on the worker
+        * threads we must wait for the jobs to finish before we can reset it.
+        */
+       sgen_workers_wait_for_jobs_finished ();
+       sgen_finish_pinning ();
 
-               if (do_concurrent_checks)
-                       check_nursery_is_clean ();
-       }
+       sgen_pin_stats_reset ();
+
+       if (do_concurrent_checks)
+               check_nursery_is_clean ();
 }
 
 static void
@@ -2762,12 +2717,12 @@ major_start_collection (gboolean concurrent, size_t *old_next_pin_slot)
 
        g_assert (sgen_section_gray_queue_is_empty (sgen_workers_get_distribute_section_gray_queue ()));
 
+       sgen_cement_reset ();
+
        if (concurrent) {
                g_assert (major_collector.is_concurrent);
                concurrent_collection_in_progress = TRUE;
 
-               sgen_cement_concurrent_start ();
-
                current_object_ops = major_collector.major_concurrent_ops;
        } else {
                current_object_ops = major_collector.major_ops;
@@ -2790,6 +2745,7 @@ major_start_collection (gboolean concurrent, size_t *old_next_pin_slot)
                major_collector.start_major_collection ();
 
        major_copy_or_mark_from_roots (old_next_pin_slot, concurrent, FALSE, FALSE, FALSE);
+       major_finish_copy_or_mark ();
 }
 
 static void
@@ -2800,7 +2756,7 @@ wait_for_workers_to_finish (void)
 }
 
 static void
-major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean scan_mod_union, gboolean scan_whole_nursery)
+major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean scan_whole_nursery)
 {
        ScannedObjectCounts counts;
        LOSObject *bigobj, *prevbo;
@@ -2814,9 +2770,11 @@ major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean
 
                current_object_ops = major_collector.major_concurrent_ops;
 
-               major_copy_or_mark_from_roots (NULL, FALSE, TRUE, scan_mod_union, scan_whole_nursery);
+               major_copy_or_mark_from_roots (NULL, FALSE, TRUE, TRUE, scan_whole_nursery);
 
                sgen_workers_signal_finish_nursery_collection ();
+
+               major_finish_copy_or_mark ();
                gray_queue_enable_redirect (WORKERS_DISTRIBUTE_GRAY_QUEUE);
 
                sgen_workers_join ();
@@ -2899,8 +2857,6 @@ major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean
                sgen_pin_stats_reset ();
        }
 
-       if (concurrent_collection_in_progress)
-               sgen_cement_concurrent_finish ();
        sgen_cement_clear_below_threshold ();
 
        if (check_mark_bits_after_major_collection)
@@ -3001,7 +2957,7 @@ major_do_collection (const char *reason)
        TV_GETTIME (time_start);
 
        major_start_collection (FALSE, &old_next_pin_slot);
-       major_finish_collection (reason, old_next_pin_slot, FALSE, FALSE);
+       major_finish_collection (reason, old_next_pin_slot, FALSE);
 
        TV_GETTIME (time_end);
        gc_stats.major_gc_time += TV_ELAPSED (time_start, time_end);
@@ -3036,7 +2992,6 @@ major_start_concurrent_collection (const char *reason)
        major_start_collection (TRUE, NULL);
 
        gray_queue_redirect (&gray_queue);
-       sgen_workers_wait_for_jobs_finished ();
 
        num_objects_marked = major_collector.get_and_reset_num_major_objects_marked ();
        MONO_GC_CONCURRENT_START_END (GENERATION_OLD, num_objects_marked);
@@ -3047,6 +3002,9 @@ major_start_concurrent_collection (const char *reason)
        current_collection_generation = -1;
 }
 
+/*
+ * Returns whether the major collection has finished.
+ */
 static gboolean
 major_should_finish_concurrent_collection (void)
 {
@@ -3063,7 +3021,7 @@ major_update_concurrent_collection (void)
        TV_GETTIME (total_start);
 
        MONO_GC_CONCURRENT_UPDATE_FINISH_BEGIN (GENERATION_OLD, major_collector.get_and_reset_num_major_objects_marked ());
-       binary_protocol_concurrent_update_finish ();
+       binary_protocol_concurrent_update ();
 
        major_collector.update_cardtable_mod_union ();
        sgen_los_update_cardtable_mod_union ();
@@ -3086,7 +3044,7 @@ major_finish_concurrent_collection (void)
        TV_GETTIME (total_start);
 
        MONO_GC_CONCURRENT_UPDATE_FINISH_BEGIN (GENERATION_OLD, major_collector.get_and_reset_num_major_objects_marked ());
-       binary_protocol_concurrent_update_finish ();
+       binary_protocol_concurrent_finish ();
 
        /*
         * The major collector can add global remsets which are processed in the finishing
@@ -3108,7 +3066,7 @@ major_finish_concurrent_collection (void)
                sgen_check_mod_union_consistency ();
 
        current_collection_generation = GENERATION_OLD;
-       major_finish_collection ("finishing", -1, TRUE, late_pinned);
+       major_finish_collection ("finishing", -1, late_pinned);
 
        if (whole_heap_check_before_collection)
                sgen_check_whole_heap (FALSE);
@@ -3480,7 +3438,10 @@ null_ephemerons_for_domain (MonoDomain *domain)
        while (current) {
                MonoObject *object = (MonoObject*)current->array;
 
-               if (object && !object->vtable) {
+               if (object)
+                       SGEN_ASSERT (0, object->vtable, "Can't have objects without vtables.");
+
+               if (object && object->vtable->domain == domain) {
                        EphemeronLinkNode *tmp = current;
 
                        if (prev)
@@ -3819,10 +3780,11 @@ scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise, Gray
                        SGEN_LOG (3, "GC disabled for thread %p, range: %p-%p, size: %td", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start);
                        continue;
                }
-               if (mono_thread_info_run_state (info) != STATE_RUNNING) {
-                       SGEN_LOG (3, "Skipping non-running thread %p, range: %p-%p, size: %td (state %d)", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, mono_thread_info_run_state (info));
+               if (!mono_thread_info_is_live (info)) {
+                       SGEN_LOG (3, "Skipping non-running thread %p, range: %p-%p, size: %td (state %x)", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, info->info.thread_state);
                        continue;
                }
+               g_assert (info->suspend_done);
                SGEN_LOG (3, "Scanning thread %p, range: %p-%p, size: %td, pinned=%zd", info, info->stack_start, info->stack_end, (char*)info->stack_end - (char*)info->stack_start, sgen_get_pinned_count ());
                if (gc_callbacks.thread_mark_func && !conservative_stack_mark) {
                        UserCopyOrMarkData data = { NULL, queue };
@@ -4089,46 +4051,6 @@ mono_gc_wbarrier_arrayref_copy (gpointer dest_ptr, gpointer src_ptr, int count)
        remset.wbarrier_arrayref_copy (dest_ptr, src_ptr, count);
 }
 
-static char *found_obj;
-
-static void
-find_object_for_ptr_callback (char *obj, size_t size, void *user_data)
-{
-       char *ptr = user_data;
-
-       if (ptr >= obj && ptr < obj + size) {
-               g_assert (!found_obj);
-               found_obj = obj;
-       }
-}
-
-/* for use in the debugger */
-char* find_object_for_ptr (char *ptr);
-char*
-find_object_for_ptr (char *ptr)
-{
-       if (ptr >= nursery_section->data && ptr < nursery_section->end_data) {
-               found_obj = NULL;
-               sgen_scan_area_with_callback (nursery_section->data, nursery_section->end_data,
-                               find_object_for_ptr_callback, ptr, TRUE);
-               if (found_obj)
-                       return found_obj;
-       }
-
-       found_obj = NULL;
-       sgen_los_iterate_objects (find_object_for_ptr_callback, ptr);
-       if (found_obj)
-               return found_obj;
-
-       /*
-        * Very inefficient, but this is debugging code, supposed to
-        * be called from gdb, so we don't care.
-        */
-       found_obj = NULL;
-       major_collector.iterate_objects (ITERATE_OBJECTS_SWEEP_ALL, find_object_for_ptr_callback, ptr);
-       return found_obj;
-}
-
 void
 mono_gc_wbarrier_generic_nostore (gpointer ptr)
 {
@@ -4139,7 +4061,7 @@ mono_gc_wbarrier_generic_nostore (gpointer ptr)
 #ifdef XDOMAIN_CHECKS_IN_WBARRIER
        /* FIXME: ptr_in_heap must be called with the GC lock held */
        if (xdomain_checks && *(MonoObject**)ptr && ptr_in_heap (ptr)) {
-               char *start = find_object_for_ptr (ptr);
+               char *start = sgen_find_object_for_ptr (ptr);
                MonoObject *value = *(MonoObject**)ptr;
                LOCK_GC;
                g_assert (start);
@@ -4615,6 +4537,12 @@ parse_double_in_interval (const char *env_var, const char *opt_name, const char
        return TRUE;
 }
 
+static gboolean
+thread_in_critical_region (SgenThreadInfo *info)
+{
+       return info->in_critical_region;
+}
+
 void
 mono_gc_base_init (void)
 {
@@ -4629,7 +4557,6 @@ mono_gc_base_init (void)
        int dummy;
        gboolean debug_print_allowance = FALSE;
        double allowance_ratio = 0, save_target = 0;
-       gboolean have_split_nursery = FALSE;
        gboolean cement_enabled = TRUE;
 
        mono_counters_init ();
@@ -4664,6 +4591,7 @@ mono_gc_base_init (void)
        cb.thread_unregister = sgen_thread_unregister;
        cb.thread_attach = sgen_thread_attach;
        cb.mono_method_is_critical = (gpointer)is_critical_method;
+       cb.mono_thread_in_critical_region = thread_in_critical_region;
 #ifndef HOST_WIN32
        cb.thread_exit = mono_gc_pthread_exit;
        cb.mono_gc_pthread_create = (gpointer)mono_gc_pthread_create;
@@ -4737,7 +4665,6 @@ mono_gc_base_init (void)
                        sgen_simple_nursery_init (&sgen_minor_collector);
                } else if (!strcmp (minor_collector_opt, "split")) {
                        sgen_split_nursery_init (&sgen_minor_collector);
-                       have_split_nursery = TRUE;
                } else {
                        sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using `simple` instead.", "Unknown minor collector `%s'.", minor_collector_opt);
                        goto use_simple_nursery;
@@ -4760,6 +4687,9 @@ mono_gc_base_init (void)
 
        sgen_nursery_size = DEFAULT_NURSERY_SIZE;
 
+       if (major_collector.is_concurrent)
+               cement_enabled = FALSE;
+
        if (opts) {
                gboolean usage_printed = FALSE;
 
@@ -4822,7 +4752,6 @@ mono_gc_base_init (void)
                                size_t val;
                                opt = strchr (opt, '=') + 1;
                                if (*opt && mono_gc_parse_environment_string_extract_number (opt, &val)) {
-#ifdef SGEN_ALIGN_NURSERY
                                        if ((val & (val - 1))) {
                                                sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using default value.", "`nursery-size` must be a power of two.");
                                                continue;
@@ -4838,9 +4767,6 @@ mono_gc_base_init (void)
                                        sgen_nursery_bits = 0;
                                        while (ONE_P << (++ sgen_nursery_bits) != sgen_nursery_size)
                                                ;
-#else
-                                       sgen_nursery_size = val;
-#endif
                                } else {
                                        sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using default value.", "`nursery-size` must be an integer.");
                                        continue;
@@ -4940,6 +4866,11 @@ mono_gc_base_init (void)
 
        alloc_nursery ();
 
+       if (major_collector.is_concurrent && cement_enabled) {
+               sgen_env_var_error (MONO_GC_PARAMS_NAME, "Ignoring.", "`cementing` is not supported on concurrent major collectors.");
+               cement_enabled = FALSE;
+       }
+
        sgen_cement_init (cement_enabled);
 
        if ((env = g_getenv (MONO_GC_DEBUG_NAME))) {
@@ -5047,6 +4978,10 @@ mono_gc_base_init (void)
                                do_verify_nursery = TRUE;
                                sgen_set_use_managed_allocator (FALSE);
                                enable_nursery_canaries = TRUE;
+                       } else if (!strcmp (opt, "do-not-finalize")) {
+                               do_not_finalize = TRUE;
+                       } else if (!strcmp (opt, "log-finalizers")) {
+                               log_finalizers = TRUE;
                        } else if (!sgen_bridge_handle_gc_debug (opt)) {
                                sgen_env_var_error (MONO_GC_DEBUG_NAME, "Ignoring.", "Unknown option `%s`.", opt);
 
@@ -5077,6 +5012,8 @@ mono_gc_base_init (void)
                                fprintf (stderr, "  heap-dump=<filename>\n");
                                fprintf (stderr, "  binary-protocol=<filename>[:<file-size-limit>]\n");
                                fprintf (stderr, "  nursery-canaries\n");
+                               fprintf (stderr, "  do-not-finalize\n");
+                               fprintf (stderr, "  log-finalizers\n");
                                sgen_bridge_print_gc_debug_usage ();
                                fprintf (stderr, "\n");
 
@@ -5107,97 +5044,67 @@ mono_gc_get_gc_name (void)
        return "sgen";
 }
 
-static MonoMethod *write_barrier_method;
+static MonoMethod *write_barrier_conc_method;
+static MonoMethod *write_barrier_noconc_method;
 
 gboolean
 sgen_is_critical_method (MonoMethod *method)
 {
-       return (method == write_barrier_method || sgen_is_managed_allocator (method));
+       return (method == write_barrier_conc_method || method == write_barrier_noconc_method || sgen_is_managed_allocator (method));
 }
 
 gboolean
 sgen_has_critical_method (void)
 {
-       return write_barrier_method || sgen_has_managed_allocator ();
+       return write_barrier_conc_method || write_barrier_noconc_method || sgen_has_managed_allocator ();
 }
 
 #ifndef DISABLE_JIT
 
 static void
-emit_nursery_check (MonoMethodBuilder *mb, int *nursery_check_return_labels)
+emit_nursery_check (MonoMethodBuilder *mb, int *nursery_check_return_labels, gboolean is_concurrent)
 {
-       memset (nursery_check_return_labels, 0, sizeof (int) * 3);
-#ifdef SGEN_ALIGN_NURSERY
+       int shifted_nursery_start = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
+
+       memset (nursery_check_return_labels, 0, sizeof (int) * 2);
        // if (ptr_in_nursery (ptr)) return;
        /*
         * Masking out the bits might be faster, but we would have to use 64 bit
         * immediates, which might be slower.
         */
+       mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
+       mono_mb_emit_byte (mb, CEE_MONO_LDPTR_NURSERY_START);
+       mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
+       mono_mb_emit_byte (mb, CEE_SHR_UN);
+       mono_mb_emit_stloc (mb, shifted_nursery_start);
+
        mono_mb_emit_ldarg (mb, 0);
        mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
        mono_mb_emit_byte (mb, CEE_SHR_UN);
-       mono_mb_emit_ptr (mb, (gpointer)((mword)sgen_get_nursery_start () >> DEFAULT_NURSERY_BITS));
+       mono_mb_emit_ldloc (mb, shifted_nursery_start);
        nursery_check_return_labels [0] = mono_mb_emit_branch (mb, CEE_BEQ);
 
-       if (!major_collector.is_concurrent) {
+       if (!is_concurrent) {
                // if (!ptr_in_nursery (*ptr)) return;
                mono_mb_emit_ldarg (mb, 0);
                mono_mb_emit_byte (mb, CEE_LDIND_I);
                mono_mb_emit_icon (mb, DEFAULT_NURSERY_BITS);
                mono_mb_emit_byte (mb, CEE_SHR_UN);
-               mono_mb_emit_ptr (mb, (gpointer)((mword)sgen_get_nursery_start () >> DEFAULT_NURSERY_BITS));
+               mono_mb_emit_ldloc (mb, shifted_nursery_start);
                nursery_check_return_labels [1] = mono_mb_emit_branch (mb, CEE_BNE_UN);
        }
-#else
-       int label_continue1, label_continue2;
-       int dereferenced_var;
-
-       // if (ptr < (sgen_get_nursery_start ())) goto continue;
-       mono_mb_emit_ldarg (mb, 0);
-       mono_mb_emit_ptr (mb, (gpointer) sgen_get_nursery_start ());
-       label_continue_1 = mono_mb_emit_branch (mb, CEE_BLT);
-
-       // if (ptr >= sgen_get_nursery_end ())) goto continue;
-       mono_mb_emit_ldarg (mb, 0);
-       mono_mb_emit_ptr (mb, (gpointer) sgen_get_nursery_end ());
-       label_continue_2 = mono_mb_emit_branch (mb, CEE_BGE);
-
-       // Otherwise return
-       nursery_check_return_labels [0] = mono_mb_emit_branch (mb, CEE_BR);
-
-       // continue:
-       mono_mb_patch_branch (mb, label_continue_1);
-       mono_mb_patch_branch (mb, label_continue_2);
-
-       // Dereference and store in local var
-       dereferenced_var = mono_mb_add_local (mb, &mono_defaults.int_class->byval_arg);
-       mono_mb_emit_ldarg (mb, 0);
-       mono_mb_emit_byte (mb, CEE_LDIND_I);
-       mono_mb_emit_stloc (mb, dereferenced_var);
-
-       if (!major_collector.is_concurrent) {
-               // if (*ptr < sgen_get_nursery_start ()) return;
-               mono_mb_emit_ldloc (mb, dereferenced_var);
-               mono_mb_emit_ptr (mb, (gpointer) sgen_get_nursery_start ());
-               nursery_check_return_labels [1] = mono_mb_emit_branch (mb, CEE_BLT);
-
-               // if (*ptr >= sgen_get_nursery_end ()) return;
-               mono_mb_emit_ldloc (mb, dereferenced_var);
-               mono_mb_emit_ptr (mb, (gpointer) sgen_get_nursery_end ());
-               nursery_check_return_labels [2] = mono_mb_emit_branch (mb, CEE_BGE);
-       }
-#endif 
 }
 #endif
 
 MonoMethod*
-mono_gc_get_write_barrier (void)
+mono_gc_get_specific_write_barrier (gboolean is_concurrent)
 {
        MonoMethod *res;
        MonoMethodBuilder *mb;
        MonoMethodSignature *sig;
+       MonoMethod **write_barrier_method_addr;
 #ifdef MANAGED_WBARRIER
-       int i, nursery_check_labels [3];
+       int i, nursery_check_labels [2];
 
 #ifdef HAVE_KW_THREAD
        int stack_end_offset = -1;
@@ -5209,19 +5116,27 @@ mono_gc_get_write_barrier (void)
 
        // FIXME: Maybe create a separate version for ctors (the branch would be
        // correctly predicted more times)
-       if (write_barrier_method)
-               return write_barrier_method;
+       if (is_concurrent)
+               write_barrier_method_addr = &write_barrier_conc_method;
+       else
+               write_barrier_method_addr = &write_barrier_noconc_method;
+
+       if (*write_barrier_method_addr)
+               return *write_barrier_method_addr;
 
        /* Create the IL version of mono_gc_barrier_generic_store () */
        sig = mono_metadata_signature_alloc (mono_defaults.corlib, 1);
        sig->ret = &mono_defaults.void_class->byval_arg;
        sig->params [0] = &mono_defaults.int_class->byval_arg;
 
-       mb = mono_mb_new (mono_defaults.object_class, "wbarrier", MONO_WRAPPER_WRITE_BARRIER);
+       if (is_concurrent)
+               mb = mono_mb_new (mono_defaults.object_class, "wbarrier_conc", MONO_WRAPPER_WRITE_BARRIER);
+       else
+               mb = mono_mb_new (mono_defaults.object_class, "wbarrier_noconc", MONO_WRAPPER_WRITE_BARRIER);
 
 #ifndef DISABLE_JIT
 #ifdef MANAGED_WBARRIER
-       emit_nursery_check (mb, nursery_check_labels);
+       emit_nursery_check (mb, nursery_check_labels, is_concurrent);
        /*
        addr = sgen_cardtable + ((address >> CARD_BITS) & CARD_MASK)
        *addr = 1;
@@ -5241,12 +5156,19 @@ mono_gc_get_write_barrier (void)
        ldc_i4_1
        stind_i1
        */
-       mono_mb_emit_ptr (mb, sgen_cardtable);
+       mono_mb_emit_byte (mb, MONO_CUSTOM_PREFIX);
+       mono_mb_emit_byte (mb, CEE_MONO_LDPTR_CARD_TABLE);
        mono_mb_emit_ldarg (mb, 0);
        mono_mb_emit_icon (mb, CARD_BITS);
        mono_mb_emit_byte (mb, CEE_SHR_UN);
+       mono_mb_emit_byte (mb, CEE_CONV_I);
 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
-       mono_mb_emit_ptr (mb, (gpointer)CARD_MASK);
+#if SIZEOF_VOID_P == 8
+       mono_mb_emit_icon8 (mb, CARD_MASK);
+#else
+       mono_mb_emit_icon (mb, CARD_MASK);
+#endif
+       mono_mb_emit_byte (mb, CEE_CONV_I);
        mono_mb_emit_byte (mb, CEE_AND);
 #endif
        mono_mb_emit_byte (mb, CEE_ADD);
@@ -5254,7 +5176,7 @@ mono_gc_get_write_barrier (void)
        mono_mb_emit_byte (mb, CEE_STIND_I1);
 
        // return;
-       for (i = 0; i < 3; ++i) {
+       for (i = 0; i < 2; ++i) {
                if (nursery_check_labels [i])
                        mono_mb_patch_branch (mb, nursery_check_labels [i]);
        }
@@ -5269,17 +5191,23 @@ mono_gc_get_write_barrier (void)
        mono_mb_free (mb);
 
        LOCK_GC;
-       if (write_barrier_method) {
+       if (*write_barrier_method_addr) {
                /* Already created */
                mono_free_method (res);
        } else {
                /* double-checked locking */
                mono_memory_barrier ();
-               write_barrier_method = res;
+               *write_barrier_method_addr = res;
        }
        UNLOCK_GC;
 
-       return write_barrier_method;
+       return *write_barrier_method_addr;
+}
+
+MonoMethod*
+mono_gc_get_write_barrier (void)
+{
+       return mono_gc_get_specific_write_barrier (major_collector.is_concurrent);
 }
 
 char*
@@ -5323,7 +5251,8 @@ sgen_get_array_fill_vtable (void)
 {
        if (!array_fill_vtable) {
                static MonoClass klass;
-               static MonoVTable vtable;
+               static char _vtable[sizeof(MonoVTable)+8];
+               MonoVTable* vtable = (MonoVTable*) ALIGN_TO(_vtable, 8);
                gsize bmap;
 
                MonoDomain *domain = mono_get_root_domain ();
@@ -5335,12 +5264,12 @@ sgen_get_array_fill_vtable (void)
                klass.sizes.element_size = 1;
                klass.name = "array_filler_type";
 
-               vtable.klass = &klass;
+               vtable->klass = &klass;
                bmap = 0;
-               vtable.gc_descr = mono_gc_make_descr_for_array (TRUE, &bmap, 0, 1);
-               vtable.rank = 1;
+               vtable->gc_descr = mono_gc_make_descr_for_array (TRUE, &bmap, 0, 1);
+               vtable->rank = 1;
 
-               array_fill_vtable = &vtable;
+               array_fill_vtable = vtable;
        }
        return array_fill_vtable;
 }
@@ -5389,7 +5318,7 @@ void mono_gc_set_skip_thread (gboolean skip)
        UNLOCK_GC;
 }
 
-SgenRemeberedSet*
+SgenRememberedSet*
 sgen_get_remset (void)
 {
        return &remset;
@@ -5409,6 +5338,8 @@ mono_gc_get_vtable_bits (MonoClass *class)
                case GC_BRIDGE_OPAQUE_CLASS:
                        res = SGEN_GC_BIT_BRIDGE_OPAQUE_OBJECT;
                        break;
+               case GC_BRIDGE_TRANSPARENT_CLASS:
+                       break;
                }
        }
        if (fin_callbacks.is_class_finalization_aware) {