[sgen] DTrace probe for when a GC is requested.
[mono.git] / mono / metadata / sgen-gc.c
index b72ded2f4d8adc52b14503948ae29ecf23d72531..9382eae9471f7189cc08e4005f775b165a8f2e38 100644 (file)
@@ -260,6 +260,10 @@ guint32 collect_before_allocs = 0;
 static gboolean whole_heap_check_before_collection = FALSE;
 /* If set, do a heap consistency check before each minor collection */
 static gboolean consistency_check_at_minor_collection = FALSE;
+/* If set, check whether mark bits are consistent after major collections */
+static gboolean check_mark_bits_after_major_collection = FALSE;
+/* If set, do a few checks when the concurrent collector is used */
+static gboolean do_concurrent_checks = FALSE;
 /* If set, check that there are no references to the domain left at domain unload */
 static gboolean xdomain_checks = FALSE;
 /* If not null, dump the heap after each collection into this file */
@@ -529,13 +533,13 @@ typedef SgenGrayQueue GrayQueue;
 
 /* forward declarations */
 static void scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise, GrayQueue *queue);
-static void scan_from_registered_roots (CopyOrMarkObjectFunc copy_func, ScanObjectFunc scan_func, char *addr_start, char *addr_end, int root_type, GrayQueue *queue);
-static void scan_finalizer_entries (CopyOrMarkObjectFunc copy_func, FinalizeReadyEntry *list, GrayQueue *queue);
+static void scan_from_registered_roots (char *addr_start, char *addr_end, int root_type, ScanCopyContext ctx);
+static void scan_finalizer_entries (FinalizeReadyEntry *list, ScanCopyContext ctx);
 static void report_finalizer_roots (void);
 static void report_registered_roots (void);
 
 static void pin_from_roots (void *start_nursery, void *end_nursery, GrayQueue *queue);
-static int pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery, GrayQueue *queue, ScanObjectFunc scan_func);
+static int pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery, ScanCopyContext ctx);
 static void finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *queue);
 
 void mono_gc_scan_for_specific_ref (MonoObject *key, gboolean precise);
@@ -543,27 +547,20 @@ void mono_gc_scan_for_specific_ref (MonoObject *key, gboolean precise);
 
 static void init_stats (void);
 
-static int mark_ephemerons_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, GrayQueue *queue);
-static void clear_unreachable_ephemerons (CopyOrMarkObjectFunc copy_func, char *start, char *end, GrayQueue *queue);
+static int mark_ephemerons_in_range (char *start, char *end, ScanCopyContext ctx);
+static void clear_unreachable_ephemerons (char *start, char *end, ScanCopyContext ctx);
 static void null_ephemerons_for_domain (MonoDomain *domain);
 
 SgenObjectOperations current_object_ops;
 SgenMajorCollector major_collector;
 SgenMinorCollector sgen_minor_collector;
 static GrayQueue gray_queue;
+static GrayQueue remember_major_objects_gray_queue;
 
 static SgenRemeberedSet remset;
 
 /* The gray queue to use from the main collection thread. */
-static SgenGrayQueue*
-sgen_workers_get_main_thread_queue (void)
-{
-       if (sgen_collection_is_parallel () || sgen_collection_is_concurrent ())
-               return sgen_workers_get_distribute_gray_queue ();
-       return &gray_queue;
-}
-
-#define WORKERS_DISTRIBUTE_GRAY_QUEUE  (sgen_workers_get_main_thread_queue ())
+#define WORKERS_DISTRIBUTE_GRAY_QUEUE  (&gray_queue)
 
 /*
  * The gray queue a worker job must use.  If we're not parallel or
@@ -575,13 +572,11 @@ sgen_workers_get_job_gray_queue (WorkerData *worker_data)
        return worker_data ? &worker_data->private_gray_queue : WORKERS_DISTRIBUTE_GRAY_QUEUE;
 }
 
-static LOCK_DECLARE (workers_distribute_gray_queue_mutex);
+static gboolean have_non_collection_major_object_remembers = FALSE;
 
 gboolean
 sgen_remember_major_object_for_concurrent_mark (char *obj)
 {
-       gboolean need_lock = current_collection_generation != GENERATION_NURSERY;
-
        if (!major_collector.is_concurrent)
                return FALSE;
 
@@ -590,17 +585,53 @@ sgen_remember_major_object_for_concurrent_mark (char *obj)
        if (!concurrent_collection_in_progress)
                return FALSE;
 
-       if (need_lock)
-               mono_mutex_lock (&workers_distribute_gray_queue_mutex);
+       GRAY_OBJECT_ENQUEUE (&remember_major_objects_gray_queue, obj);
 
-       sgen_gray_object_enqueue (sgen_workers_get_distribute_gray_queue (), obj);
-
-       if (need_lock)
-               mono_mutex_unlock (&workers_distribute_gray_queue_mutex);
+       if (current_collection_generation != GENERATION_NURSERY) {
+               /*
+                * This happens when the mutator allocates large or
+                * pinned objects or when allocating in degraded
+                * mode.
+                */
+               have_non_collection_major_object_remembers = TRUE;
+       }
 
        return TRUE;
 }
 
+static void
+gray_queue_redirect (SgenGrayQueue *queue)
+{
+       gboolean wake = FALSE;
+
+
+       for (;;) {
+               GrayQueueSection *section = sgen_gray_object_dequeue_section (queue);
+               if (!section)
+                       break;
+               sgen_section_gray_queue_enqueue (queue->alloc_prepare_data, section);
+               wake = TRUE;
+       }
+
+       if (wake) {
+               g_assert (concurrent_collection_in_progress ||
+                               (current_collection_generation == GENERATION_OLD && major_collector.is_parallel));
+               if (sgen_workers_have_started ()) {
+                       sgen_workers_wake_up_all ();
+               } else {
+                       if (concurrent_collection_in_progress)
+                               g_assert (current_collection_generation == -1);
+               }
+       }
+}
+
+static void
+redirect_major_object_remembers (void)
+{
+       gray_queue_redirect (&remember_major_objects_gray_queue);
+       have_non_collection_major_object_remembers = FALSE;
+}
+
 static gboolean
 is_xdomain_ref_allowed (gpointer *ptr, char *obj, MonoDomain *domain)
 {
@@ -1109,9 +1140,15 @@ mono_gc_clear_domain (MonoDomain * domain)
  * lock must be held.  For serial collectors that is not necessary.
  */
 void
-sgen_add_to_global_remset (gpointer ptr)
+sgen_add_to_global_remset (gpointer ptr, gpointer obj)
 {
        remset.record_pointer (ptr);
+
+       if (G_UNLIKELY (MONO_GC_GLOBAL_REMSET_ADD_ENABLED ())) {
+               MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
+               MONO_GC_GLOBAL_REMSET_ADD ((mword)ptr, (mword)obj, sgen_safe_object_get_size (obj),
+                               vt->klass->name_space, vt->klass->name);
+       }
 }
 
 /*
@@ -1122,9 +1159,11 @@ sgen_add_to_global_remset (gpointer ptr)
  * usage.
  */
 gboolean
-sgen_drain_gray_stack (GrayQueue *queue, ScanObjectFunc scan_func, int max_objs)
+sgen_drain_gray_stack (int max_objs, ScanCopyContext ctx)
 {
        char *obj;
+       ScanObjectFunc scan_func = ctx.scan_func;
+       GrayQueue *queue = ctx.queue;
 
        if (max_objs == -1) {
                for (;;) {
@@ -1158,7 +1197,7 @@ sgen_drain_gray_stack (GrayQueue *queue, ScanObjectFunc scan_func, int max_objs)
  * pinned objects.  Return the number of pinned objects.
  */
 static int
-pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery, GrayQueue *queue, ScanObjectFunc scan_func)
+pin_objects_from_addresses (GCMemSection *section, void **start, void **end, void *start_nursery, void *end_nursery, ScanCopyContext ctx)
 {
        void *last = NULL;
        int count = 0;
@@ -1168,6 +1207,8 @@ pin_objects_from_addresses (GCMemSection *section, void **start, void **end, voi
        void *addr;
        int idx;
        void **definitely_pinned = start;
+       ScanObjectFunc scan_func = ctx.scan_func;
+       SgenGrayQueue *queue = ctx.queue;
 
        sgen_nursery_allocator_prepare_for_pinning ();
 
@@ -1264,14 +1305,14 @@ pin_objects_from_addresses (GCMemSection *section, void **start, void **end, voi
 }
 
 void
-sgen_pin_objects_in_section (GCMemSection *section, GrayQueue *queue, ScanObjectFunc scan_func)
+sgen_pin_objects_in_section (GCMemSection *section, ScanCopyContext ctx)
 {
        int num_entries = section->pin_queue_num_entries;
        if (num_entries) {
                void **start = section->pin_queue_start;
                int reduced_to;
                reduced_to = pin_objects_from_addresses (section, start, start + num_entries,
-                               section->data, section->next_data, queue, scan_func);
+                               section->data, section->next_data, ctx);
                section->pin_queue_num_entries = reduced_to;
                if (!reduced_to)
                        section->pin_queue_start = NULL;
@@ -1464,6 +1505,18 @@ pin_from_roots (void *start_nursery, void *end_nursery, GrayQueue *queue)
        scan_thread_data (start_nursery, end_nursery, FALSE, queue);
 }
 
+static void
+unpin_objects_from_queue (SgenGrayQueue *queue)
+{
+       for (;;) {
+               char *addr;
+               GRAY_OBJECT_DEQUEUE (queue, addr);
+               if (!addr)
+                       break;
+               SGEN_UNPIN_OBJECT (addr);
+       }
+}
+
 typedef struct {
        CopyOrMarkObjectFunc func;
        GrayQueue *queue;
@@ -1500,8 +1553,11 @@ single_arg_user_copy_or_mark (void **obj)
  * This function is not thread-safe!
  */
 static void
-precisely_scan_objects_from (CopyOrMarkObjectFunc copy_func, ScanObjectFunc scan_func, void** start_root, void** end_root, char* n_start, char *n_end, mword desc, GrayQueue *queue)
+precisely_scan_objects_from (void** start_root, void** end_root, char* n_start, char *n_end, mword desc, ScanCopyContext ctx)
 {
+       CopyOrMarkObjectFunc copy_func = ctx.copy_func;
+       SgenGrayQueue *queue = ctx.queue;
+
        switch (desc & ROOT_DESC_TYPE_MASK) {
        case ROOT_DESC_BITMAP:
                desc >>= ROOT_DESC_TYPE_SHIFT;
@@ -1509,7 +1565,7 @@ precisely_scan_objects_from (CopyOrMarkObjectFunc copy_func, ScanObjectFunc scan
                        if ((desc & 1) && *start_root) {
                                copy_func (start_root, queue);
                                SGEN_LOG (9, "Overwrote root at %p with %p", start_root, *start_root);
-                               sgen_drain_gray_stack (queue, scan_func, -1);
+                               sgen_drain_gray_stack (-1, ctx);
                        }
                        desc >>= 1;
                        start_root++;
@@ -1527,7 +1583,7 @@ precisely_scan_objects_from (CopyOrMarkObjectFunc copy_func, ScanObjectFunc scan
                                if ((bmap & 1) && *objptr) {
                                        copy_func (objptr, queue);
                                        SGEN_LOG (9, "Overwrote root at %p with %p", objptr, *objptr);
-                                       sgen_drain_gray_stack (queue, scan_func, -1);
+                                       sgen_drain_gray_stack (-1, ctx);
                                }
                                bmap >>= 1;
                                ++objptr;
@@ -1759,8 +1815,10 @@ report_registered_roots (void)
 }
 
 static void
-scan_finalizer_entries (CopyOrMarkObjectFunc copy_func, FinalizeReadyEntry *list, GrayQueue *queue)
+scan_finalizer_entries (FinalizeReadyEntry *list, ScanCopyContext ctx)
 {
+       CopyOrMarkObjectFunc copy_func = ctx.copy_func;
+       SgenGrayQueue *queue = ctx.queue;
        FinalizeReadyEntry *fin;
 
        for (fin = list; fin; fin = fin->next) {
@@ -1801,6 +1859,7 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
        int done_with_ephemerons, ephemeron_rounds = 0;
        CopyOrMarkObjectFunc copy_func = current_object_ops.copy_or_mark_object;
        ScanObjectFunc scan_func = current_object_ops.scan_object;
+       ScanCopyContext ctx = { scan_func, copy_func, queue };
 
        /*
         * We copied all the reachable objects. Now it's the time to copy
@@ -1815,7 +1874,7 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
         *   To achieve better cache locality and cache usage, we drain the gray stack 
         * frequently, after each object is copied, and just finish the work here.
         */
-       sgen_drain_gray_stack (queue, scan_func, -1);
+       sgen_drain_gray_stack (-1, ctx);
        TV_GETTIME (atv);
        SGEN_LOG (2, "%s generation done", generation_name (generation));
 
@@ -1835,34 +1894,34 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
         */
        done_with_ephemerons = 0;
        do {
-               done_with_ephemerons = mark_ephemerons_in_range (copy_func, start_addr, end_addr, queue);
-               sgen_drain_gray_stack (queue, scan_func, -1);
+               done_with_ephemerons = mark_ephemerons_in_range (start_addr, end_addr, ctx);
+               sgen_drain_gray_stack (-1, ctx);
                ++ephemeron_rounds;
        } while (!done_with_ephemerons);
 
-       sgen_scan_togglerefs (copy_func, start_addr, end_addr, queue);
+       sgen_scan_togglerefs (start_addr, end_addr, ctx);
        if (generation == GENERATION_OLD)
-               sgen_scan_togglerefs (copy_func, sgen_get_nursery_start (), sgen_get_nursery_end (), queue);
+               sgen_scan_togglerefs (sgen_get_nursery_start (), sgen_get_nursery_end (), ctx);
 
        if (sgen_need_bridge_processing ()) {
-               sgen_collect_bridge_objects (copy_func, start_addr, end_addr, generation, queue);
+               sgen_collect_bridge_objects (start_addr, end_addr, generation, ctx);
                if (generation == GENERATION_OLD)
-                       sgen_collect_bridge_objects (copy_func, sgen_get_nursery_start (), sgen_get_nursery_end (), GENERATION_NURSERY, queue);
+                       sgen_collect_bridge_objects (sgen_get_nursery_start (), sgen_get_nursery_end (), GENERATION_NURSERY, ctx);
        }
 
        /*
        Make sure we drain the gray stack before processing disappearing links and finalizers.
        If we don't make sure it is empty we might wrongly see a live object as dead.
        */
-       sgen_drain_gray_stack (queue, scan_func, -1);
+       sgen_drain_gray_stack (-1, ctx);
 
        /*
        We must clear weak links that don't track resurrection before processing object ready for
        finalization so they can be cleared before that.
        */
-       sgen_null_link_in_range (copy_func, start_addr, end_addr, generation, TRUE, queue);
+       sgen_null_link_in_range (start_addr, end_addr, generation, TRUE, ctx);
        if (generation == GENERATION_OLD)
-               sgen_null_link_in_range (copy_func, start_addr, end_addr, GENERATION_NURSERY, TRUE, queue);
+               sgen_null_link_in_range (start_addr, end_addr, GENERATION_NURSERY, TRUE, ctx);
 
 
        /* walk the finalization queue and move also the objects that need to be
@@ -1870,20 +1929,20 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
         * on are also not reclaimed. As with the roots above, only objects in the nursery
         * are marked/copied.
         */
-       sgen_finalize_in_range (copy_func, start_addr, end_addr, generation, queue);
+       sgen_finalize_in_range (start_addr, end_addr, generation, ctx);
        if (generation == GENERATION_OLD)
-               sgen_finalize_in_range (copy_func, sgen_get_nursery_start (), sgen_get_nursery_end (), GENERATION_NURSERY, queue);
+               sgen_finalize_in_range (sgen_get_nursery_start (), sgen_get_nursery_end (), GENERATION_NURSERY, ctx);
        /* drain the new stack that might have been created */
        SGEN_LOG (6, "Precise scan of gray area post fin");
-       sgen_drain_gray_stack (queue, scan_func, -1);
+       sgen_drain_gray_stack (-1, ctx);
 
        /*
         * This must be done again after processing finalizable objects since CWL slots are cleared only after the key is finalized.
         */
        done_with_ephemerons = 0;
        do {
-               done_with_ephemerons = mark_ephemerons_in_range (copy_func, start_addr, end_addr, queue);
-               sgen_drain_gray_stack (queue, scan_func, -1);
+               done_with_ephemerons = mark_ephemerons_in_range (start_addr, end_addr, ctx);
+               sgen_drain_gray_stack (-1, ctx);
                ++ephemeron_rounds;
        } while (!done_with_ephemerons);
 
@@ -1891,7 +1950,7 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
         * Clear ephemeron pairs with unreachable keys.
         * We pass the copy func so we can figure out if an array was promoted or not.
         */
-       clear_unreachable_ephemerons (copy_func, start_addr, end_addr, queue);
+       clear_unreachable_ephemerons (start_addr, end_addr, ctx);
 
        TV_GETTIME (btv);
        SGEN_LOG (2, "Finalize queue handling scan for %s generation: %d usecs %d ephemeron rounds", generation_name (generation), TV_ELAPSED (atv, btv), ephemeron_rounds);
@@ -1906,12 +1965,12 @@ finish_gray_stack (char *start_addr, char *end_addr, int generation, GrayQueue *
         */
        g_assert (sgen_gray_object_queue_is_empty (queue));
        for (;;) {
-               sgen_null_link_in_range (copy_func, start_addr, end_addr, generation, FALSE, queue);
+               sgen_null_link_in_range (start_addr, end_addr, generation, FALSE, ctx);
                if (generation == GENERATION_OLD)
-                       sgen_null_link_in_range (copy_func, start_addr, end_addr, GENERATION_NURSERY, FALSE, queue);
+                       sgen_null_link_in_range (start_addr, end_addr, GENERATION_NURSERY, FALSE, ctx);
                if (sgen_gray_object_queue_is_empty (queue))
                        break;
-               sgen_drain_gray_stack (queue, scan_func, -1);
+               sgen_drain_gray_stack (-1, ctx);
        }
 
        g_assert (sgen_gray_object_queue_is_empty (queue));
@@ -1939,13 +1998,13 @@ check_scan_starts (void)
 }
 
 static void
-scan_from_registered_roots (CopyOrMarkObjectFunc copy_func, ScanObjectFunc scan_func, char *addr_start, char *addr_end, int root_type, GrayQueue *queue)
+scan_from_registered_roots (char *addr_start, char *addr_end, int root_type, ScanCopyContext ctx)
 {
        void **start_root;
        RootRecord *root;
        SGEN_HASH_TABLE_FOREACH (&roots_hash [root_type], start_root, root) {
                SGEN_LOG (6, "Precise root scan %p-%p (desc: %p)", start_root, root->end_root, (void*)root->root_desc);
-               precisely_scan_objects_from (copy_func, scan_func, start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc, queue);
+               precisely_scan_objects_from (start_root, (void**)root->end_root, addr_start, addr_end, root->root_desc, ctx);
        } SGEN_HASH_TABLE_FOREACH_END;
 }
 
@@ -2230,11 +2289,10 @@ static void
 job_scan_from_registered_roots (WorkerData *worker_data, void *job_data_untyped)
 {
        ScanFromRegisteredRootsJobData *job_data = job_data_untyped;
+       ScanCopyContext ctx = { job_data->scan_func, job_data->copy_or_mark_func,
+               sgen_workers_get_job_gray_queue (worker_data) };
 
-       scan_from_registered_roots (job_data->copy_or_mark_func, job_data->scan_func,
-                       job_data->heap_start, job_data->heap_end,
-                       job_data->root_type,
-                       sgen_workers_get_job_gray_queue (worker_data));
+       scan_from_registered_roots (job_data->heap_start, job_data->heap_end, job_data->root_type, ctx);
        sgen_free_internal_dynamic (job_data, sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA);
 }
 
@@ -2263,10 +2321,9 @@ static void
 job_scan_finalizer_entries (WorkerData *worker_data, void *job_data_untyped)
 {
        ScanFinalizerEntriesJobData *job_data = job_data_untyped;
+       ScanCopyContext ctx = { NULL, current_object_ops.copy_or_mark_object, sgen_workers_get_job_gray_queue (worker_data) };
 
-       scan_finalizer_entries (current_object_ops.copy_or_mark_object,
-                       job_data->list,
-                       sgen_workers_get_job_gray_queue (worker_data));
+       scan_finalizer_entries (job_data->list, ctx);
        sgen_free_internal_dynamic (job_data, sizeof (ScanFinalizerEntriesJobData), INTERNAL_MEM_WORKER_JOB_DATA);
 }
 
@@ -2373,10 +2430,18 @@ static void
 init_gray_queue (void)
 {
        if (sgen_collection_is_parallel () || sgen_collection_is_concurrent ()) {
-               sgen_gray_object_queue_init_invalid (&gray_queue);
                sgen_workers_init_distribute_gray_queue ();
+               sgen_gray_object_queue_init_with_alloc_prepare (&gray_queue, NULL,
+                               gray_queue_redirect, sgen_workers_get_distribute_section_gray_queue ());
+       } else {
+               sgen_gray_object_queue_init (&gray_queue, NULL);
+       }
+
+       if (major_collector.is_concurrent) {
+               sgen_gray_object_queue_init_with_alloc_prepare (&remember_major_objects_gray_queue, NULL,
+                               gray_queue_redirect, sgen_workers_get_distribute_section_gray_queue ());
        } else {
-               sgen_gray_object_queue_init (&gray_queue);
+               sgen_gray_object_queue_init_invalid (&remember_major_objects_gray_queue);
        }
 }
 
@@ -2385,7 +2450,7 @@ init_gray_queue (void)
  * collection.
  */
 static gboolean
-collect_nursery (void)
+collect_nursery (SgenGrayQueue *unpin_queue)
 {
        gboolean needs_major;
        size_t max_garbage_amount;
@@ -2395,6 +2460,7 @@ collect_nursery (void)
        ScanFinalizerEntriesJobData *sfejd_fin_ready, *sfejd_critical_fin;
        ScanThreadDataJobData *stdjd;
        mword fragment_total;
+       ScanCopyContext ctx;
        TV_DECLARE (all_atv);
        TV_DECLARE (all_btv);
        TV_DECLARE (atv);
@@ -2460,9 +2526,13 @@ collect_nursery (void)
        if (remset.prepare_for_minor_collection)
                remset.prepare_for_minor_collection ();
 
+       MONO_GC_CHECKPOINT_1 (GENERATION_NURSERY);
+
        sgen_process_fin_stage_entries ();
        sgen_process_dislink_stage_entries ();
 
+       MONO_GC_CHECKPOINT_2 (GENERATION_NURSERY);
+
        /* pin from pinned handles */
        sgen_init_pinning ();
        mono_profiler_gc_event (MONO_GC_EVENT_MARK_START, 0);
@@ -2470,7 +2540,10 @@ collect_nursery (void)
        /* identify pinned objects */
        sgen_optimize_pin_queue (0);
        sgen_pinning_setup_section (nursery_section);
-       sgen_pin_objects_in_section (nursery_section, WORKERS_DISTRIBUTE_GRAY_QUEUE, NULL);
+       ctx.scan_func = NULL;
+       ctx.copy_func = NULL;
+       ctx.queue = WORKERS_DISTRIBUTE_GRAY_QUEUE;
+       sgen_pin_objects_in_section (nursery_section, ctx);
        sgen_pinning_trim_queue_to_section (nursery_section);
 
        TV_GETTIME (atv);
@@ -2478,6 +2551,8 @@ collect_nursery (void)
        SGEN_LOG (2, "Finding pinned pointers: %d in %d usecs", sgen_get_pinned_count (), TV_ELAPSED (btv, atv));
        SGEN_LOG (4, "Start scan with %d pinned objects", sgen_get_pinned_count ());
 
+       MONO_GC_CHECKPOINT_3 (GENERATION_NURSERY);
+
        if (whole_heap_check_before_collection) {
                sgen_clear_nursery_fragments ();
                sgen_check_whole_heap ();
@@ -2506,8 +2581,14 @@ collect_nursery (void)
        time_minor_scan_remsets += TV_ELAPSED (atv, btv);
        SGEN_LOG (2, "Old generation scan: %d usecs", TV_ELAPSED (atv, btv));
 
-       if (!sgen_collection_is_parallel ())
-               sgen_drain_gray_stack (&gray_queue, current_object_ops.scan_object, -1);
+       MONO_GC_CHECKPOINT_4 (GENERATION_NURSERY);
+
+       if (!sgen_collection_is_parallel ()) {
+               ctx.scan_func = current_object_ops.scan_object;
+               ctx.copy_func = NULL;
+               ctx.queue = &gray_queue;
+               sgen_drain_gray_stack (-1, ctx);
+       }
 
        if (mono_profiler_get_events () & MONO_PROFILE_GC_ROOTS)
                report_registered_roots ();
@@ -2516,6 +2597,8 @@ collect_nursery (void)
        TV_GETTIME (atv);
        time_minor_scan_pinned += TV_ELAPSED (btv, atv);
 
+       MONO_GC_CHECKPOINT_5 (GENERATION_NURSERY);
+
        /* registered roots, this includes static fields */
        scrrjd_normal = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        scrrjd_normal->copy_or_mark_func = current_object_ops.copy_or_mark_object;
@@ -2536,6 +2619,8 @@ collect_nursery (void)
        TV_GETTIME (btv);
        time_minor_scan_registered_roots += TV_ELAPSED (atv, btv);
 
+       MONO_GC_CHECKPOINT_6 (GENERATION_NURSERY);
+
        /* thread data */
        stdjd = sgen_alloc_internal_dynamic (sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
        stdjd->heap_start = sgen_get_nursery_start ();
@@ -2546,13 +2631,9 @@ collect_nursery (void)
        time_minor_scan_thread_data += TV_ELAPSED (btv, atv);
        btv = atv;
 
-       if (sgen_collection_is_parallel () || sgen_collection_is_concurrent ()) {
-               while (!sgen_gray_object_queue_is_empty (WORKERS_DISTRIBUTE_GRAY_QUEUE)) {
-                       sgen_workers_distribute_gray_queue_sections ();
-                       g_usleep (1000);
-               }
-       }
-       sgen_workers_join ();
+       MONO_GC_CHECKPOINT_7 (GENERATION_NURSERY);
+
+       g_assert (!sgen_collection_is_parallel () && !sgen_collection_is_concurrent ());
 
        if (sgen_collection_is_parallel () || sgen_collection_is_concurrent ())
                g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
@@ -2566,11 +2647,15 @@ collect_nursery (void)
        sfejd_critical_fin->list = critical_fin_list;
        sgen_workers_enqueue_job (job_scan_finalizer_entries, sfejd_critical_fin);
 
+       MONO_GC_CHECKPOINT_8 (GENERATION_NURSERY);
+
        finish_gray_stack (sgen_get_nursery_start (), nursery_next, GENERATION_NURSERY, &gray_queue);
        TV_GETTIME (atv);
        time_minor_finish_gray_stack += TV_ELAPSED (btv, atv);
        mono_profiler_gc_event (MONO_GC_EVENT_MARK_END, 0);
 
+       MONO_GC_CHECKPOINT_9 (GENERATION_NURSERY);
+
        /*
         * The (single-threaded) finalization code might have done
         * some copying/marking so we can only reset the GC thread's
@@ -2589,7 +2674,9 @@ collect_nursery (void)
         * next allocations.
         */
        mono_profiler_gc_event (MONO_GC_EVENT_RECLAIM_START, 0);
-       fragment_total = sgen_build_nursery_fragments (nursery_section, nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries);
+       fragment_total = sgen_build_nursery_fragments (nursery_section,
+                       nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries,
+                       unpin_queue);
        if (!fragment_total)
                degraded_mode = 1;
 
@@ -2658,12 +2745,14 @@ major_copy_or_mark_from_roots (int *old_next_pin_slot, gboolean finish_up_concur
        ScanFromRegisteredRootsJobData *scrrjd_normal, *scrrjd_wbarrier;
        ScanThreadDataJobData *stdjd;
        ScanFinalizerEntriesJobData *sfejd_fin_ready, *sfejd_critical_fin;
+       ScanCopyContext ctx;
 
        if (major_collector.is_concurrent) {
                /*This cleans up unused fragments */
                sgen_nursery_allocator_prepare_for_pinning ();
 
-               check_nursery_is_clean ();
+               if (do_concurrent_checks)
+                       check_nursery_is_clean ();
        } else {
                /* The concurrent collector doesn't touch the nursery. */
                sgen_nursery_alloc_prepare_for_major ();
@@ -2710,6 +2799,17 @@ major_copy_or_mark_from_roots (int *old_next_pin_slot, gboolean finish_up_concur
        pin_from_roots ((void*)lowest_heap_address, (void*)highest_heap_address, WORKERS_DISTRIBUTE_GRAY_QUEUE);
        sgen_optimize_pin_queue (0);
 
+       /*
+        * The concurrent collector doesn't move objects, neither on
+        * the major heap nor in the nursery, so we can mark even
+        * before pinning has finished.  For the non-concurrent
+        * collector we start the workers after pinning.
+        */
+       if (major_collector.is_concurrent) {
+               sgen_workers_start_all_workers ();
+               sgen_workers_start_marking ();
+       }
+
        /*
         * pin_queue now contains all candidate pointers, sorted and
         * uniqued.  We must do two passes now to figure out which
@@ -2754,8 +2854,10 @@ major_copy_or_mark_from_roots (int *old_next_pin_slot, gboolean finish_up_concur
        if (profile_roots)
                notify_gc_roots (&root_report);
        /* second pass for the sections */
-       sgen_pin_objects_in_section (nursery_section, WORKERS_DISTRIBUTE_GRAY_QUEUE,
-                       concurrent_collection_in_progress ? current_object_ops.scan_object : NULL);
+       ctx.scan_func = concurrent_collection_in_progress ? current_object_ops.scan_object : NULL;
+       ctx.copy_func = NULL;
+       ctx.queue = WORKERS_DISTRIBUTE_GRAY_QUEUE;
+       sgen_pin_objects_in_section (nursery_section, ctx);
        major_collector.pin_objects (WORKERS_DISTRIBUTE_GRAY_QUEUE);
        if (old_next_pin_slot)
                *old_next_pin_slot = sgen_get_pinned_count ();
@@ -2771,8 +2873,10 @@ major_copy_or_mark_from_roots (int *old_next_pin_slot, gboolean finish_up_concur
        main_gc_thread = mono_native_thread_self ();
 #endif
 
-       sgen_workers_start_all_workers ();
-       sgen_workers_start_marking ();
+       if (!major_collector.is_concurrent) {
+               sgen_workers_start_all_workers ();
+               sgen_workers_start_marking ();
+       }
 
        if (mono_profiler_get_events () & MONO_PROFILE_GC_ROOTS)
                report_registered_roots ();
@@ -2844,7 +2948,8 @@ major_copy_or_mark_from_roots (int *old_next_pin_slot, gboolean finish_up_concur
 
                sgen_pin_stats_reset ();
 
-               check_nursery_is_clean ();
+               if (do_concurrent_checks)
+                       check_nursery_is_clean ();
        }
 }
 
@@ -2859,6 +2964,8 @@ major_start_collection (int *old_next_pin_slot)
        mono_perfcounters->gc_collections1++;
 #endif
 
+       g_assert (sgen_section_gray_queue_is_empty (sgen_workers_get_distribute_section_gray_queue ()));
+
        if (major_collector.is_concurrent)
                concurrent_collection_in_progress = TRUE;
 
@@ -2887,16 +2994,14 @@ major_start_collection (int *old_next_pin_slot)
 static void
 wait_for_workers_to_finish (void)
 {
+       g_assert (sgen_gray_object_queue_is_empty (&remember_major_objects_gray_queue));
+
        if (major_collector.is_parallel || major_collector.is_concurrent) {
-               while (!sgen_gray_object_queue_is_empty (WORKERS_DISTRIBUTE_GRAY_QUEUE)) {
-                       sgen_workers_distribute_gray_queue_sections ();
-                       g_usleep (1000);
-               }
+               gray_queue_redirect (&gray_queue);
+               sgen_workers_join ();
        }
-       sgen_workers_join ();
 
-       if (major_collector.is_parallel || major_collector.is_concurrent)
-               g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
+       g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
 
 #ifdef SGEN_DEBUG_INTERNAL_ALLOC
        main_gc_thread = NULL;
@@ -2914,7 +3019,8 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
 
        TV_GETTIME (btv);
 
-       wait_for_workers_to_finish ();
+       if (major_collector.is_concurrent || major_collector.is_parallel)
+               wait_for_workers_to_finish ();
 
        current_object_ops = major_collector.major_ops;
 
@@ -2922,9 +3028,20 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
                major_copy_or_mark_from_roots (NULL, TRUE, scan_mod_union);
                wait_for_workers_to_finish ();
 
-               check_nursery_is_clean ();
+               g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
+
+               if (do_concurrent_checks)
+                       check_nursery_is_clean ();
        }
 
+       /*
+        * The workers have stopped so we need to finish gray queue
+        * work that might result from finalization in the main GC
+        * thread.  Redirection must therefore be turned off.
+        */
+       sgen_gray_object_queue_disable_alloc_prepare (&gray_queue);
+       g_assert (sgen_section_gray_queue_is_empty (sgen_workers_get_distribute_section_gray_queue ()));
+
        /* all the objects in the heap */
        finish_gray_stack (heap_start, heap_end, GENERATION_OLD, &gray_queue);
        TV_GETTIME (atv);
@@ -2951,6 +3068,11 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
        reset_heap_boundaries ();
        sgen_update_heap_boundaries ((mword)sgen_get_nursery_start (), (mword)sgen_get_nursery_end ());
 
+       if (check_mark_bits_after_major_collection)
+               sgen_check_major_heap_marked ();
+
+       MONO_GC_SWEEP_BEGIN (GENERATION_OLD, !major_collector.sweeps_lazily);
+
        /* sweep the big objects list */
        prevbo = NULL;
        for (bigobj = los_object_list; bigobj;) {
@@ -2984,6 +3106,8 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
 
        major_collector.sweep ();
 
+       MONO_GC_SWEEP_END (GENERATION_OLD, !major_collector.sweeps_lazily);
+
        TV_GETTIME (btv);
        time_major_sweep += TV_ELAPSED (atv, btv);
 
@@ -2992,7 +3116,7 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
                 * pinned objects as we go, memzero() the empty fragments so they are ready for the
                 * next allocations.
                 */
-               if (!sgen_build_nursery_fragments (nursery_section, nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries))
+               if (!sgen_build_nursery_fragments (nursery_section, nursery_section->pin_queue_start, nursery_section->pin_queue_num_entries, NULL))
                        degraded_mode = 1;
 
                /* prepare the pin queue for the next collection */
@@ -3022,6 +3146,8 @@ major_finish_collection (const char *reason, int old_next_pin_slot, gboolean sca
 
        major_collector.finish_major_collection ();
 
+       g_assert (sgen_section_gray_queue_is_empty (sgen_workers_get_distribute_section_gray_queue ()));
+
        if (major_collector.is_concurrent)
                concurrent_collection_in_progress = FALSE;
 
@@ -3042,6 +3168,11 @@ major_do_collection (const char *reason)
        TV_DECLARE (all_btv);
        int old_next_pin_slot;
 
+       if (major_collector.get_and_reset_num_major_objects_marked) {
+               long long num_marked = major_collector.get_and_reset_num_major_objects_marked ();
+               g_assert (!num_marked);
+       }
+
        /* world must be stopped already */
        TV_GETTIME (all_atv);
 
@@ -3051,6 +3182,10 @@ major_do_collection (const char *reason)
        TV_GETTIME (all_btv);
        gc_stats.major_gc_time_usecs += TV_ELAPSED (all_atv, all_btv);
 
+       /* FIXME: also report this to the user, preferably in gc-end. */
+       if (major_collector.get_and_reset_num_major_objects_marked)
+               major_collector.get_and_reset_num_major_objects_marked ();
+
        return bytes_pinned_from_failed_allocation > 0;
 }
 
@@ -3059,32 +3194,55 @@ static gboolean major_do_collection (const char *reason);
 static void
 major_start_concurrent_collection (const char *reason)
 {
+       long long num_objects_marked = major_collector.get_and_reset_num_major_objects_marked ();
+
+       g_assert (num_objects_marked == 0);
+
+       MONO_GC_CONCURRENT_START_BEGIN (GENERATION_OLD);
+
        // FIXME: store reason and pass it when finishing
        major_start_collection (NULL);
 
-       sgen_workers_distribute_gray_queue_sections ();
-       g_assert (sgen_gray_object_queue_is_empty (sgen_workers_get_distribute_gray_queue ()));
-
+       gray_queue_redirect (&gray_queue);
        sgen_workers_wait_for_jobs ();
 
+       num_objects_marked = major_collector.get_and_reset_num_major_objects_marked ();
+       MONO_GC_CONCURRENT_START_END (GENERATION_OLD, num_objects_marked);
+
        current_collection_generation = -1;
 }
 
 static gboolean
-major_update_or_finish_concurrent_collection (void)
+major_update_or_finish_concurrent_collection (gboolean force_finish)
 {
+       SgenGrayQueue unpin_queue;
+       memset (&unpin_queue, 0, sizeof (unpin_queue));
+
+       MONO_GC_CONCURRENT_UPDATE_FINISH_BEGIN (GENERATION_OLD, major_collector.get_and_reset_num_major_objects_marked ());
+
+       g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
+       if (!have_non_collection_major_object_remembers)
+               g_assert (sgen_gray_object_queue_is_empty (&remember_major_objects_gray_queue));
+
        major_collector.update_cardtable_mod_union ();
        sgen_los_update_cardtable_mod_union ();
 
-       if (!sgen_workers_all_done ()) {
-               g_print ("workers not done\n");
+       if (!force_finish && !sgen_workers_all_done ()) {
+               MONO_GC_CONCURRENT_UPDATE_END (GENERATION_OLD, major_collector.get_and_reset_num_major_objects_marked ());
                return FALSE;
        }
 
-       collect_nursery ();
+       collect_nursery (&unpin_queue);
+       redirect_major_object_remembers ();
 
        current_collection_generation = GENERATION_OLD;
        major_finish_collection ("finishing", -1, TRUE);
+
+       unpin_objects_from_queue (&unpin_queue);
+       sgen_gray_object_queue_deinit (&unpin_queue);
+
+       MONO_GC_CONCURRENT_FINISH_END (GENERATION_OLD, major_collector.get_and_reset_num_major_objects_marked ());
+
        current_collection_generation = -1;
 
        if (whole_heap_check_before_collection)
@@ -3125,9 +3283,16 @@ sgen_ensure_free_space (size_t size)
                }
        }
 
+       if (generation_to_collect == -1) {
+               if (concurrent_collection_in_progress && sgen_workers_all_done ()) {
+                       generation_to_collect = GENERATION_OLD;
+                       reason = "Finish concurrent collection";
+               }
+       }
+
        if (generation_to_collect == -1)
                return;
-       sgen_perform_collection (size, generation_to_collect, reason, generation_to_collect == GENERATION_NURSERY);
+       sgen_perform_collection (size, generation_to_collect, reason, FALSE);
 }
 
 void
@@ -3136,8 +3301,18 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
        TV_DECLARE (gc_end);
        GGTimingInfo infos [2];
        int overflow_generation_to_collect = -1;
+       int oldest_generation_collected = generation_to_collect;
        const char *overflow_reason = NULL;
 
+       MONO_GC_REQUESTED (generation_to_collect, requested_size, wait_to_finish ? 1 : 0);
+
+       g_assert (generation_to_collect == GENERATION_NURSERY || generation_to_collect == GENERATION_OLD);
+
+       if (have_non_collection_major_object_remembers) {
+               g_assert (concurrent_collection_in_progress);
+               redirect_major_object_remembers ();
+       }
+
        memset (infos, 0, sizeof (infos));
        mono_profiler_gc_event (MONO_GC_EVENT_START, generation_to_collect);
 
@@ -3150,32 +3325,38 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
        sgen_stop_world (generation_to_collect);
 
        if (concurrent_collection_in_progress) {
-               g_assert (generation_to_collect == GENERATION_NURSERY);
-               g_print ("finishing concurrent collection\n");
-               if (major_update_or_finish_concurrent_collection ())
+               if (major_update_or_finish_concurrent_collection (wait_to_finish && generation_to_collect == GENERATION_OLD)) {
+                       oldest_generation_collected = GENERATION_OLD;
+                       goto done;
+               }
+               if (generation_to_collect == GENERATION_OLD)
                        goto done;
        }
 
        //FIXME extract overflow reason
        if (generation_to_collect == GENERATION_NURSERY) {
-               if (collect_nursery ()) {
+               if (collect_nursery (NULL)) {
                        overflow_generation_to_collect = GENERATION_OLD;
                        overflow_reason = "Minor overflow";
                }
                if (concurrent_collection_in_progress) {
-                       current_collection_generation = GENERATION_OLD;
-                       // FIXME: we need to do the distribution in the background
-                       for (;;) {
-                               sgen_workers_distribute_gray_queue_sections ();
-                               if (sgen_gray_object_queue_is_empty (sgen_workers_get_distribute_gray_queue ()))
-                                       break;
-                               g_usleep (1000);
-                       }
-                       current_collection_generation = -1;
+                       redirect_major_object_remembers ();
+                       sgen_workers_wake_up_all ();
                }
        } else {
-               if (major_collector.is_concurrent)
-                       collect_nursery ();
+               SgenGrayQueue unpin_queue;
+               SgenGrayQueue *unpin_queue_ptr;
+               memset (&unpin_queue, 0, sizeof (unpin_queue));
+
+               if (major_collector.is_concurrent && wait_to_finish)
+                       unpin_queue_ptr = &unpin_queue;
+               else
+                       unpin_queue_ptr = NULL;
+
+               if (major_collector.is_concurrent) {
+                       g_assert (!concurrent_collection_in_progress);
+                       collect_nursery (unpin_queue_ptr);
+               }
 
                if (major_collector.is_concurrent && !wait_to_finish) {
                        major_start_concurrent_collection (reason);
@@ -3187,6 +3368,11 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                                overflow_reason = "Excessive pinning";
                        }
                }
+
+               if (unpin_queue_ptr) {
+                       unpin_objects_from_queue (unpin_queue_ptr);
+                       sgen_gray_object_queue_deinit (unpin_queue_ptr);
+               }
        }
 
        TV_GETTIME (gc_end);
@@ -3201,7 +3387,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                infos [1].total_time = gc_end;
 
                if (overflow_generation_to_collect == GENERATION_NURSERY)
-                       collect_nursery ();
+                       collect_nursery (NULL);
                else
                        major_do_collection (overflow_reason);
 
@@ -3210,6 +3396,8 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
 
                /* keep events symmetric */
                mono_profiler_gc_event (MONO_GC_EVENT_END, overflow_generation_to_collect);
+
+               oldest_generation_collected = MAX (oldest_generation_collected, overflow_generation_to_collect);
        }
 
        SGEN_LOG (2, "Heap size: %lu, LOS size: %lu", (unsigned long)mono_gc_get_heap_size (), (unsigned long)los_memory_usage);
@@ -3223,7 +3411,10 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
        }
 
  done:
-       sgen_restart_world (generation_to_collect, infos);
+       g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
+       g_assert (sgen_gray_object_queue_is_empty (&remember_major_objects_gray_queue));
+
+       sgen_restart_world (oldest_generation_collected, infos);
 
        mono_profiler_gc_event (MONO_GC_EVENT_END, generation_to_collect);
 }
@@ -3306,14 +3497,22 @@ void
 sgen_queue_finalization_entry (MonoObject *obj)
 {
        FinalizeReadyEntry *entry = sgen_alloc_internal (INTERNAL_MEM_FINALIZE_READY_ENTRY);
+       gboolean critical = has_critical_finalizer (obj);
        entry->object = obj;
-       if (has_critical_finalizer (obj)) {
+       if (critical) {
                entry->next = critical_fin_list;
                critical_fin_list = entry;
        } else {
                entry->next = fin_ready_list;
                fin_ready_list = entry;
        }
+
+       if (G_UNLIKELY (MONO_GC_FINALIZE_ENQUEUE_ENABLED ())) {
+               int gen = sgen_ptr_in_nursery (obj) ? GENERATION_NURSERY : GENERATION_OLD;
+               MonoVTable *vt = (MonoVTable*)LOAD_VTABLE (obj);
+               MONO_GC_FINALIZE_ENQUEUE ((mword)obj, sgen_safe_object_get_size (obj),
+                               vt->klass->name_space, vt->klass->name, gen, critical);
+       }
 }
 
 static inline int
@@ -3365,8 +3564,10 @@ null_ephemerons_for_domain (MonoDomain *domain)
 
 /* LOCKING: requires that the GC lock is held */
 static void
-clear_unreachable_ephemerons (CopyOrMarkObjectFunc copy_func, char *start, char *end, GrayQueue *queue)
+clear_unreachable_ephemerons (char *start, char *end, ScanCopyContext ctx)
 {
+       CopyOrMarkObjectFunc copy_func = ctx.copy_func;
+       GrayQueue *queue = ctx.queue;
        int was_in_nursery, was_promoted;
        EphemeronLinkNode *current = ephemeron_list, *prev = NULL;
        MonoArray *array;
@@ -3423,13 +3624,14 @@ clear_unreachable_ephemerons (CopyOrMarkObjectFunc copy_func, char *start, char
                        }
 
                        if (was_promoted) {
+                               gpointer value = cur->value;
                                if (ptr_in_nursery (key)) {/*key was not promoted*/
                                        SGEN_LOG (5, "\tAdded remset to key %p", key);
-                                       sgen_add_to_global_remset (&cur->key);
+                                       sgen_add_to_global_remset (&cur->key, key);
                                }
-                               if (ptr_in_nursery (cur->value)) {/*value was not promoted*/
+                               if (ptr_in_nursery (value)) {/*value was not promoted*/
                                        SGEN_LOG (5, "\tAdded remset to value %p", cur->value);
-                                       sgen_add_to_global_remset (&cur->value);
+                                       sgen_add_to_global_remset (&cur->value, value);
                                }
                        }
                }
@@ -3440,8 +3642,10 @@ clear_unreachable_ephemerons (CopyOrMarkObjectFunc copy_func, char *start, char
 
 /* LOCKING: requires that the GC lock is held */
 static int
-mark_ephemerons_in_range (CopyOrMarkObjectFunc copy_func, char *start, char *end, GrayQueue *queue)
+mark_ephemerons_in_range (char *start, char *end, ScanCopyContext ctx)
 {
+       CopyOrMarkObjectFunc copy_func = ctx.copy_func;
+       GrayQueue *queue = ctx.queue;
        int nothing_marked = 1;
        EphemeronLinkNode *current = ephemeron_list;
        MonoArray *array;
@@ -3724,6 +3928,10 @@ scan_thread_data (void *start_nursery, void *end_nursery, gboolean precise, Gray
                                gc_callbacks.thread_mark_func (info->runtime_data, info->stack_start, info->stack_end, precise);
                                set_user_copy_or_mark_data (NULL);
                        } else if (!precise) {
+                               if (!conservative_stack_mark) {
+                                       fprintf (stderr, "Precise stack mark not supported - disabling.\n");
+                                       conservative_stack_mark = TRUE;
+                               }
                                conservatively_pin_objects_from (info->stack_start, info->stack_end, start_nursery, end_nursery, PIN_TYPE_STACK);
                        }
                }
@@ -4491,6 +4699,7 @@ mono_gc_base_init (void)
        int dummy;
        gboolean debug_print_allowance = FALSE;
        double allowance_ratio = 0, save_target = 0;
+       gboolean have_split_nursery = FALSE;
 
        do {
                result = InterlockedCompareExchange (&gc_initialized, -1, 0);
@@ -4573,11 +4782,12 @@ mono_gc_base_init (void)
        if (!minor_collector_opt) {
                sgen_simple_nursery_init (&sgen_minor_collector);
        } else {
-               if (!strcmp (minor_collector_opt, "simple"))
+               if (!strcmp (minor_collector_opt, "simple")) {
                        sgen_simple_nursery_init (&sgen_minor_collector);
-               else if (!strcmp (minor_collector_opt, "split"))
+               } else if (!strcmp (minor_collector_opt, "split")) {
                        sgen_split_nursery_init (&sgen_minor_collector);
-               else {
+                       have_split_nursery = TRUE;
+               } else {
                        fprintf (stderr, "Unknown minor collector `%s'.\n", minor_collector_opt);
                        exit (1);
                }
@@ -4592,6 +4802,10 @@ mono_gc_base_init (void)
        } else if (!major_collector_opt || !strcmp (major_collector_opt, "marksweep-fixed-par")) {
                sgen_marksweep_fixed_par_init (&major_collector);
        } else if (!major_collector_opt || !strcmp (major_collector_opt, "marksweep-conc")) {
+               if (have_split_nursery) {
+                       fprintf (stderr, "Concurrent Mark&Sweep does not yet support the split nursery.\n");
+                       exit (1);
+               }
                sgen_marksweep_conc_init (&major_collector);
        } else if (!strcmp (major_collector_opt, "copying")) {
                sgen_copying_init (&major_collector);
@@ -4804,9 +5018,6 @@ mono_gc_base_init (void)
        if (minor_collector_opt)
                g_free (minor_collector_opt);
 
-       if (major_collector.is_concurrent)
-               LOCK_INIT (workers_distribute_gray_queue_mutex);
-
        alloc_nursery ();
 
        if ((env = getenv ("MONO_GC_DEBUG"))) {
@@ -4852,6 +5063,8 @@ mono_gc_base_init (void)
                        } else if (!strcmp (opt, "check-at-minor-collections")) {
                                consistency_check_at_minor_collection = TRUE;
                                nursery_clear_policy = CLEAR_AT_GC;
+                       } else if (!strcmp (opt, "check-mark-bits")) {
+                               check_mark_bits_after_major_collection = TRUE;
                        } else if (!strcmp (opt, "xdomain-checks")) {
                                xdomain_checks = TRUE;
                        } else if (!strcmp (opt, "clear-at-gc")) {
@@ -4862,6 +5075,12 @@ mono_gc_base_init (void)
                                do_scan_starts_check = TRUE;
                        } else if (!strcmp (opt, "verify-nursery-at-minor-gc")) {
                                do_verify_nursery = TRUE;
+                       } else if (!strcmp (opt, "check-concurrent")) {
+                               if (!major_collector.is_concurrent) {
+                                       fprintf (stderr, "Error: check-concurrent only world with concurrent major collectors.\n");
+                                       exit (1);
+                               }
+                               do_concurrent_checks = TRUE;
                        } else if (!strcmp (opt, "dump-nursery-at-minor-gc")) {
                                do_dump_nursery_content = TRUE;
                        } else if (!strcmp (opt, "no-managed-allocator")) {
@@ -4892,12 +5111,14 @@ mono_gc_base_init (void)
                                fprintf (stderr, "  collect-before-allocs[=<n>]\n");
                                fprintf (stderr, "  verify-before-allocs[=<n>]\n");
                                fprintf (stderr, "  check-at-minor-collections\n");
+                               fprintf (stderr, "  check-mark-bits\n");
                                fprintf (stderr, "  verify-before-collections\n");
                                fprintf (stderr, "  verify-nursery-at-minor-gc\n");
                                fprintf (stderr, "  dump-nursery-at-minor-gc\n");
                                fprintf (stderr, "  disable-minor\n");
                                fprintf (stderr, "  disable-major\n");
                                fprintf (stderr, "  xdomain-checks\n");
+                               fprintf (stderr, "  check-concurrent\n");
                                fprintf (stderr, "  clear-at-gc\n");
                                fprintf (stderr, "  clear-nursery-at-gc\n");
                                fprintf (stderr, "  check-scan-starts\n");
@@ -4926,7 +5147,7 @@ mono_gc_base_init (void)
        }
 
        if (major_collector.post_param_init)
-               major_collector.post_param_init ();
+               major_collector.post_param_init (&major_collector);
 
        sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);