[runtime] Further removed code that doubted IMT
[mono.git] / mono / metadata / sgen-marksweep.c
index fef08ef02ebc1a8df67b01483bdb958004046bf8..bd2d3bbbbcf946e218802a4996f4f923e7d26b9a 100644 (file)
@@ -43,6 +43,7 @@
 #include "metadata/sgen-pointer-queue.h"
 #include "metadata/sgen-pinning.h"
 #include "metadata/sgen-workers.h"
+#include "metadata/sgen-thread-pool.h"
 
 #if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
 #define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
@@ -77,7 +78,7 @@
  * SWEPT           The block is fully swept.  It might or might not be in
  *                 a free list.
  *
- * NOT_SWEPT       The block might or might not contain live objects.  If
+ * MARKING         The block might or might not contain live objects.  If
  *                 we're in between an initial collection pause and the
  *                 finishing pause, the block might or might not be in a
  *                 free list.
@@ -109,13 +110,12 @@ struct _MSBlockInfo {
         * recalculating to save the space.
         */
        guint16 obj_size_index;
-       /* FIXME: reduce this */
+       /* FIXME: Reduce this - it only needs a byte. */
        volatile gint32 state;
        unsigned int pinned : 1;
        unsigned int has_references : 1;
        unsigned int has_pinned : 1;    /* means cannot evacuate */
        unsigned int is_to_space : 1;
-       unsigned int swept : 1;
        void ** volatile free_list;
        MSBlockInfo * volatile next_free;
        guint8 *cardtable_mod_union;
@@ -176,44 +176,55 @@ static float evacuation_threshold = 0.666f;
 static float concurrent_evacuation_threshold = 0.666f;
 static gboolean want_evacuation = FALSE;
 
-static gboolean lazy_sweep = TRUE;
+static gboolean lazy_sweep = FALSE;
 
 enum {
        SWEEP_STATE_SWEPT,
        SWEEP_STATE_NEED_SWEEPING,
        SWEEP_STATE_SWEEPING,
+       SWEEP_STATE_SWEEPING_AND_ITERATING,
+       SWEEP_STATE_COMPACTING
 };
 
 static volatile int sweep_state = SWEEP_STATE_SWEPT;
 
 static gboolean concurrent_mark;
+static gboolean concurrent_sweep = TRUE;
 
 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)     SGEN_POINTER_IS_TAGGED_1 ((bl))
 #define BLOCK_TAG_HAS_REFERENCES(bl)           SGEN_POINTER_TAG_1 ((bl))
-#define BLOCK_UNTAG_HAS_REFERENCES(bl)         SGEN_POINTER_UNTAG_1 ((bl))
 
-#define BLOCK_TAG(bl)  ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
+#define BLOCK_IS_TAGGED_CHECKING(bl)           SGEN_POINTER_IS_TAGGED_2 ((bl))
+#define BLOCK_TAG_CHECKING(bl)                 SGEN_POINTER_TAG_2 ((bl))
+
+#define BLOCK_UNTAG(bl)                                SGEN_POINTER_UNTAG_12 ((bl))
+
+#define BLOCK_TAG(bl)                          ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
 
 /* all allocated blocks in the system */
 static SgenPointerQueue allocated_blocks;
-static mono_mutex_t allocated_blocks_lock;
-
-#define LOCK_ALLOCATED_BLOCKS  mono_mutex_lock (&allocated_blocks_lock)
-#define UNLOCK_ALLOCATED_BLOCKS        mono_mutex_unlock (&allocated_blocks_lock)
 
 /* non-allocated block free-list */
 static void *empty_blocks = NULL;
 static size_t num_empty_blocks = 0;
 
-#define FOREACH_BLOCK(bl)      { size_t __index; LOCK_ALLOCATED_BLOCKS; for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [__index]);
-#define FOREACH_BLOCK_HAS_REFERENCES(bl,hr)    { size_t __index; LOCK_ALLOCATED_BLOCKS; for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = allocated_blocks.data [__index]; (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); (bl) = BLOCK_UNTAG_HAS_REFERENCES ((bl));
-#define END_FOREACH_BLOCK      } UNLOCK_ALLOCATED_BLOCKS; }
-
-#define FOREACH_BLOCK_NO_LOCK(bl)      { size_t __index; for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [__index]);
-#define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr)    { size_t __index; SGEN_ASSERT (0, sgen_is_world_stopped (), "Can't iterate blocks without lock when world is running."); for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = allocated_blocks.data [__index]; (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); (bl) = BLOCK_UNTAG_HAS_REFERENCES ((bl));
+#define FOREACH_BLOCK_NO_LOCK_CONDITION(cond,bl) {                     \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, (cond) && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = BLOCK_UNTAG (allocated_blocks.data [__index]);
+#define FOREACH_BLOCK_NO_LOCK(bl)                                      \
+       FOREACH_BLOCK_NO_LOCK_CONDITION(sgen_is_world_stopped (), bl)
+#define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                  \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, sgen_is_world_stopped () && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = allocated_blocks.data [__index];                 \
+               (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
+               (bl) = BLOCK_UNTAG ((bl));
 #define END_FOREACH_BLOCK_NO_LOCK      } }
 
-static size_t num_major_sections = 0; /* GUARD */
+static volatile size_t num_major_sections = 0;
 /*
  * One free block list for each block object size.  We add and remove blocks from these
  * lists lock-free via CAS.
@@ -269,8 +280,7 @@ add_scanned_object (void *ptr)
 }
 #endif
 
-static void
-sweep_block (MSBlockInfo *block, gboolean during_major_collection);
+static gboolean sweep_block (MSBlockInfo *block);
 
 static int
 ms_find_block_obj_size_index (size_t size)
@@ -397,13 +407,29 @@ ms_free_block (void *block)
        binary_protocol_block_free (block, MS_BLOCK_SIZE);
 }
 
+static gboolean
+sweep_in_progress (void)
+{
+       int state = sweep_state;
+       return state == SWEEP_STATE_SWEEPING ||
+               state == SWEEP_STATE_SWEEPING_AND_ITERATING ||
+               state == SWEEP_STATE_COMPACTING;
+}
+
+static inline gboolean
+block_is_swept_or_marking (MSBlockInfo *block)
+{
+       gint32 state = block->state;
+       return state == BLOCK_STATE_SWEPT || state == BLOCK_STATE_MARKING;
+}
+
 //#define MARKSWEEP_CONSISTENCY_CHECK
 
 #ifdef MARKSWEEP_CONSISTENCY_CHECK
 static void
 check_block_free_list (MSBlockInfo *block, int size, gboolean pinned)
 {
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't examine allocated blocks during sweep");
+       SGEN_ASSERT (0, !sweep_in_progress (), "Can't examine allocated blocks during sweep");
        for (; block; block = block->next_free) {
                SGEN_ASSERT (0, block->state != BLOCK_STATE_CHECKING, "Can't have a block we're checking in a free list.");
                g_assert (block->obj_size == size);
@@ -435,7 +461,6 @@ consistency_check (void)
        int i;
 
        /* check all blocks */
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't examine allocated blocks during sweep");
        FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int num_free = 0;
@@ -456,7 +481,7 @@ consistency_check (void)
                g_assert (num_free == 0);
 
                /* check all mark words are zero */
-               if (!sgen_concurrent_collection_in_progress () && (block->state == BLOCK_STATE_SWEPT || block->state == BLOCK_STATE_MARKING)) {
+               if (!sgen_concurrent_collection_in_progress () && block_is_swept_or_marking (block)) {
                        for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
                                g_assert (block->mark_words [i] == 0);
                }
@@ -482,6 +507,8 @@ add_free_block (MSBlockInfo * volatile *free_blocks, int size_index, MSBlockInfo
        } while (SGEN_CAS_PTR ((gpointer)&free_blocks [size_index], block, old) != old);
 }
 
+static void major_finish_sweep_checking (void);
+
 static gboolean
 ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
 {
@@ -511,8 +538,8 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         * want further evacuation.
         */
        info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD);
-       info->swept = 1;
        info->state = (info->is_to_space || sgen_concurrent_collection_in_progress ()) ? BLOCK_STATE_MARKING : BLOCK_STATE_SWEPT;
+       SGEN_ASSERT (6, !sweep_in_progress () || info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
        info->cardtable_mod_union = NULL;
 
        update_heap_boundaries_for_block (info);
@@ -533,11 +560,17 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
 
        add_free_block (free_blocks, size_index, info);
 
-       LOCK_ALLOCATED_BLOCKS;
+       /*
+        * This is the only place where the `allocated_blocks` array can potentially grow.
+        * We need to make sure concurrent sweep isn't running when that happens, so in that
+        * specific case we just wait for sweep to finish.
+        */
+       if (sgen_pointer_queue_will_grow (&allocated_blocks))
+               major_finish_sweep_checking ();
+
        sgen_pointer_queue_add (&allocated_blocks, BLOCK_TAG (info));
-       UNLOCK_ALLOCATED_BLOCKS;
 
-       ++num_major_sections;
+       SGEN_ATOMIC_ADD_P (num_major_sections, 1);
        return TRUE;
 }
 
@@ -546,7 +579,6 @@ obj_is_from_pinned_alloc (char *ptr)
 {
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't examine allocated blocks during sweep");
        FOREACH_BLOCK_NO_LOCK (block) {
                if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE)
                        return block->pinned;
@@ -554,18 +586,9 @@ obj_is_from_pinned_alloc (char *ptr)
        return FALSE;
 }
 
-static void
-assert_block_state_is_consistent (MSBlockInfo *info)
-{
-       int swept = info->state == BLOCK_STATE_SWEPT || info->state == BLOCK_STATE_MARKING;
-       SGEN_ASSERT (0, swept == info->swept, "Block state inconsistent.");
-}
-
 static void
 ensure_can_access_block_free_list (MSBlockInfo *block)
 {
-       assert_block_state_is_consistent (block);
-
  retry:
        for (;;) {
                switch (block->state) {
@@ -576,8 +599,8 @@ ensure_can_access_block_free_list (MSBlockInfo *block)
                        SGEN_ASSERT (0, FALSE, "How did we get a block that's being checked from a free list?");
                        break;
                case BLOCK_STATE_NEED_SWEEPING:
-                       stat_major_blocks_lazy_swept ++;
-                       sweep_block (block, FALSE);
+                       if (sweep_block (block))
+                               ++stat_major_blocks_lazy_swept;
                        break;
                case BLOCK_STATE_SWEEPING:
                        /* FIXME: do this more elegantly */
@@ -603,7 +626,7 @@ unlink_slot_from_free_list_uncontested (MSBlockInfo * volatile *free_blocks, int
        ensure_can_access_block_free_list (block);
 
        obj = block->free_list;
-       SGEN_ASSERT (0, obj, "block %p in free list had no available object to alloc from", block);
+       SGEN_ASSERT (6, obj, "block %p in free list had no available object to alloc from", block);
 
        next_free_slot = *(void**)obj;
        if (next_free_slot) {
@@ -660,7 +683,7 @@ free_object (char *obj, size_t size, gboolean pinned)
        int word, bit;
        gboolean in_free_list;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
+       SGEN_ASSERT (9, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
 
        ensure_can_access_block_free_list (block);
        SGEN_ASSERT (9, (pinned && block->pinned) || (!pinned && !block->pinned), "free-object pinning mixup object %p pinned %d block %p pinned %d", obj, pinned, block, block->pinned);
@@ -758,7 +781,6 @@ major_ptr_is_in_non_pinned_space (char *ptr, char **start)
 {
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't examine allocated blocks during sweep");
        FOREACH_BLOCK_NO_LOCK (block) {
                if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) {
                        int count = MS_BLOCK_FREE / block->obj_size;
@@ -777,21 +799,65 @@ major_ptr_is_in_non_pinned_space (char *ptr, char **start)
        return FALSE;
 }
 
+static gboolean
+try_set_sweep_state (int new, int expected)
+{
+       int old = SGEN_CAS (&sweep_state, new, expected);
+       return old == expected;
+}
+
 static void
-major_finish_sweeping (void)
+set_sweep_state (int new, int expected)
 {
-       SGEN_TV_DECLARE (tv_begin);
-       SGEN_TV_DECLARE (tv_end);
+       gboolean success = try_set_sweep_state (new, expected);
+       SGEN_ASSERT (0, success, "Could not set sweep state.");
+}
+
+static gboolean ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked);
+
+static SgenThreadPoolJob * volatile sweep_job;
 
-       if (sweep_state != SWEEP_STATE_SWEEPING)
+static void
+major_finish_sweep_checking (void)
+{
+       int block_index;
+       SgenThreadPoolJob *job;
+
+ retry:
+       switch (sweep_state) {
+       case SWEEP_STATE_SWEPT:
+       case SWEEP_STATE_NEED_SWEEPING:
                return;
+       case SWEEP_STATE_SWEEPING:
+               if (try_set_sweep_state (SWEEP_STATE_SWEEPING_AND_ITERATING, SWEEP_STATE_SWEEPING))
+                       break;
+               goto retry;
+       case SWEEP_STATE_SWEEPING_AND_ITERATING:
+               SGEN_ASSERT (0, FALSE, "Is there another minor collection running?");
+               goto retry;
+       case SWEEP_STATE_COMPACTING:
+               goto wait;
+       default:
+               SGEN_ASSERT (0, FALSE, "Invalid sweep state.");
+               break;
+       }
 
-       SGEN_TV_GETTIME (tv_begin);
-       while (sweep_state == SWEEP_STATE_SWEEPING)
-               g_usleep (100);
-       SGEN_TV_GETTIME (tv_end);
+       /*
+        * We're running with the world stopped and the only other thread doing work is the
+        * sweep thread, which doesn't add blocks to the array, so we can safely access
+        * `next_slot`.
+        */
+       for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index)
+               ensure_block_is_checked_for_sweeping (block_index, FALSE, NULL);
+
+       set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_SWEEPING_AND_ITERATING);
 
-       g_print ("**** waited for sweep: %d ms\n", SGEN_TV_ELAPSED_MS (tv_begin, tv_end));
+ wait:
+       job = sweep_job;
+       if (job)
+               sgen_thread_pool_job_wait (job);
+       SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
 }
 
 static void
@@ -802,28 +868,31 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
        gboolean pinned = flags & ITERATE_OBJECTS_PINNED;
        MSBlockInfo *block;
 
-       major_finish_sweeping ();
-       FOREACH_BLOCK (block) {
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int i;
 
-               assert_block_state_is_consistent (block);
-
                if (block->pinned && !pinned)
                        continue;
                if (!block->pinned && !non_pinned)
                        continue;
                if (sweep && lazy_sweep) {
-                       /* FIXME: We can't just call `sweep_block` willy-nilly. */
-                       sweep_block (block, FALSE);
-                       SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
+                       sweep_block (block);
+                       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
                }
 
                for (i = 0; i < count; ++i) {
                        void **obj = (void**) MS_BLOCK_OBJ (block, i);
-                       /* FIXME: This condition is probably incorrect. */
-                       if (block->state != BLOCK_STATE_SWEPT && block->state != BLOCK_STATE_MARKING) {
+                       /*
+                        * We've finished sweep checking, but if we're sweeping lazily and
+                        * the flags don't require us to sweep, the block might still need
+                        * sweeping.  In that case, we need to consult the mark bits to tell
+                        * us whether an object slot is live.
+                        */
+                       if (!block_is_swept_or_marking (block)) {
                                int word, bit;
+                               SGEN_ASSERT (6, !sweep && block->state == BLOCK_STATE_NEED_SWEEPING, "Has sweeping not finished?");
                                MS_CALC_MARK_BIT (word, bit, obj);
                                if (!MS_MARK_BIT (block, word, bit))
                                        continue;
@@ -831,7 +900,7 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
                        if (MS_OBJ_ALLOCED (obj, block))
                                callback ((char*)obj, block->obj_size, data);
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static gboolean
@@ -839,7 +908,6 @@ major_is_valid_object (char *object)
 {
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't iterate blocks during sweep");
        FOREACH_BLOCK_NO_LOCK (block) {
                int idx;
                char *obj;
@@ -922,8 +990,7 @@ major_dump_heap (FILE *heap_dump_file)
        for (i = 0; i < num_block_obj_sizes; ++i)
                slots_available [i] = slots_used [i] = 0;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Can't iterate blocks during sweep");
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int index = ms_find_block_obj_size_index (block->obj_size);
                int count = MS_BLOCK_FREE / block->obj_size;
 
@@ -932,7 +999,7 @@ major_dump_heap (FILE *heap_dump_file)
                        if (MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block))
                                ++slots_used [index];
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        fprintf (heap_dump_file, "<occupancies>\n");
        for (i = 0; i < num_block_obj_sizes; ++i) {
@@ -941,7 +1008,7 @@ major_dump_heap (FILE *heap_dump_file)
        }
        fprintf (heap_dump_file, "</occupancies>\n");
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int i;
                int start = -1;
@@ -961,7 +1028,7 @@ major_dump_heap (FILE *heap_dump_file)
                }
 
                fprintf (heap_dump_file, "</section>\n");
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 #define LOAD_VTABLE    SGEN_LOAD_VTABLE
@@ -1009,7 +1076,7 @@ static void
 major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
 {
        SGEN_ASSERT (9, sgen_concurrent_collection_in_progress (), "Why are we scanning concurrently when there's no concurrent collection on?");
-       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_is_worker_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
+       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
 
        g_assert (!SGEN_OBJECT_IS_FORWARDED (obj));
 
@@ -1188,7 +1255,7 @@ sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
        }
 }
 
-static gboolean
+static inline gboolean
 try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
 {
        gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
@@ -1198,40 +1265,35 @@ try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state
        return success;
 }
 
-/*
- * FIXME: This only CASes to catch errors.  It's not needed for correctness.
- */
-static void
+static inline void
 set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
 {
-       gboolean success = try_set_block_state (block, new_state, expected_state);
-       SGEN_ASSERT (0, success, "Couldn't set block state");
-       SGEN_ASSERT (0, block->state == new_state, "Block state incorrect after set");
+       SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
+       block->state = new_state;
 }
 
 /*
- * sweep_block:
+ * If `block` needs sweeping, sweep it and return TRUE.  Otherwise return FALSE.
  *
- *   Traverse BLOCK, freeing and zeroing unused objects.
+ * Sweeping means iterating through the block's slots and building the free-list from the
+ * unmarked ones.  They will also be zeroed.  The mark bits will be reset.
  */
-static void
-sweep_block (MSBlockInfo *block, gboolean during_major_collection)
+static gboolean
+sweep_block (MSBlockInfo *block)
 {
        int count;
        void *reversed = NULL;
 
  retry:
-       assert_block_state_is_consistent (block);
-
        switch (block->state) {
        case BLOCK_STATE_SWEPT:
-               return;
+               return FALSE;
        case BLOCK_STATE_MARKING:
        case BLOCK_STATE_CHECKING:
                SGEN_ASSERT (0, FALSE, "How did we get to sweep a block that's being marked or being checked?");
                goto retry;
        case BLOCK_STATE_SWEEPING:
-               /* FIXME: Pick another block or whatever */
+               /* FIXME: Do this more elegantly */
                g_usleep (100);
                goto retry;
        case BLOCK_STATE_NEED_SWEEPING:
@@ -1242,7 +1304,7 @@ sweep_block (MSBlockInfo *block, gboolean during_major_collection)
                SGEN_ASSERT (0, FALSE, "Illegal block state");
        }
 
-       SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
 
        count = MS_BLOCK_FREE / block->obj_size;
 
@@ -1272,11 +1334,11 @@ sweep_block (MSBlockInfo *block, gboolean during_major_collection)
        }
        block->free_list = reversed;
 
-       block->swept = 1;
-
        mono_memory_write_barrier ();
 
        set_block_state (block, BLOCK_STATE_SWEPT, BLOCK_STATE_SWEEPING);
+
+       return TRUE;
 }
 
 static inline int
@@ -1303,8 +1365,8 @@ static size_t *sweep_slots_available;
 static size_t *sweep_slots_used;
 static size_t *sweep_num_blocks;
 
-static size_t num_major_sections_before_sweep;
-static size_t num_major_sections_freed_in_sweep; /* GUARD */
+static volatile size_t num_major_sections_before_sweep;
+static volatile size_t num_major_sections_freed_in_sweep;
 
 static void
 sweep_start (void)
@@ -1326,67 +1388,71 @@ sweep_start (void)
 static void sweep_finish (void);
 
 /*
- * LOCKING: The allocated blocks lock must be held when entering this function.  `block`
- * must have been loaded from the array with the lock held.  This function will unlock the
- * lock.
+ * If `wait` is TRUE and the block is currently being checked, this function will wait until
+ * the checking has finished.
  *
- * Returns whether the block is still there.
+ * Returns whether the block is still there.  If `wait` is FALSE, the return value will not
+ * be correct, i.e. must not be used.
  */
 static gboolean
-ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gboolean *have_checked)
+ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked)
 {
        int count;
        gboolean have_live = FALSE;
        gboolean have_free = FALSE;
        int nused = 0;
-       int block_state = block->state;
+       int block_state;
        int i;
+       void *tagged_block;
+       MSBlockInfo *block;
+
+       SGEN_ASSERT (6, sweep_in_progress (), "Why do we call this function if there's no sweep in progress?");
 
        if (have_checked)
                *have_checked = FALSE;
 
-       if (sweep_state != SWEEP_STATE_SWEEPING) {
-               SGEN_ASSERT (0, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
+ retry:
+       tagged_block = *(void * volatile *)&allocated_blocks.data [block_index];
+       if (!tagged_block)
+               return FALSE;
+
+       if (BLOCK_IS_TAGGED_CHECKING (tagged_block)) {
+               if (!wait)
+                       return FALSE;
+               /* FIXME: do this more elegantly */
+               g_usleep (100);
+               goto retry;
+       }
+
+       if (SGEN_CAS_PTR (&allocated_blocks.data [block_index], BLOCK_TAG_CHECKING (tagged_block), tagged_block) != tagged_block)
+               goto retry;
+
+       block = BLOCK_UNTAG (tagged_block);
+       block_state = block->state;
+
+       if (!sweep_in_progress ()) {
+               SGEN_ASSERT (6, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
                if (!lazy_sweep)
-                       SGEN_ASSERT (0, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
+                       SGEN_ASSERT (6, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
        }
 
- retry:
        switch (block_state) {
        case BLOCK_STATE_SWEPT:
        case BLOCK_STATE_NEED_SWEEPING:
        case BLOCK_STATE_SWEEPING:
-               UNLOCK_ALLOCATED_BLOCKS;
-               return TRUE;
+               goto done;
        case BLOCK_STATE_MARKING:
-               if (sweep_state == SWEEP_STATE_SWEEPING)
-                       break;
-               UNLOCK_ALLOCATED_BLOCKS;
-               return TRUE;
+               break;
        case BLOCK_STATE_CHECKING:
-               /*
-                * FIXME: do this more elegantly.
-                *
-                * Also, when we're called from the sweep thread, we don't actually have to
-                * wait for it to finish, because the sweep thread doesn't use the block.
-                * However, the sweep thread needs to know when all the blocks have been
-                * checked (so it can set the global sweep state to SWEPT), so we'd have to
-                * do some kind of accounting if we don't wait.
-                */
-               g_usleep (100);
-               block_state = block->state;
-               goto retry;
+               SGEN_ASSERT (0, FALSE, "We set the CHECKING bit - how can the stage be CHECKING?");
+               goto done;
        default:
                SGEN_ASSERT (0, FALSE, "Illegal block state");
                break;
        }
 
-       block->swept = 0;
-       SGEN_ASSERT (0, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
        set_block_state (block, BLOCK_STATE_CHECKING, BLOCK_STATE_MARKING);
-       UNLOCK_ALLOCATED_BLOCKS;
-
-       assert_block_state_is_consistent (block);
 
        if (have_checked)
                *have_checked = TRUE;
@@ -1423,7 +1489,7 @@ ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gbool
                 * statistics.
                 */
                if (!lazy_sweep)
-                       sweep_block (block, TRUE);
+                       sweep_block (block);
 
                if (!has_pinned) {
                        ++sweep_num_blocks [obj_size_index];
@@ -1437,54 +1503,44 @@ ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gbool
                 */
                if (have_free) {
                        MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
-                       int index = MS_BLOCK_OBJ_SIZE_INDEX (block->obj_size);
 
                        if (!lazy_sweep)
-                               SGEN_ASSERT (0, block->free_list, "How do we not have a free list when there are free slots?");
+                               SGEN_ASSERT (6, block->free_list, "How do we not have a free list when there are free slots?");
 
-                       add_free_block (free_blocks, index, block);
+                       add_free_block (free_blocks, obj_size_index, block);
                }
 
                /* FIXME: Do we need the heap boundaries while we do nursery collections? */
                update_heap_boundaries_for_block (block);
-
-               return TRUE;
        } else {
                /*
                 * Blocks without live objects are removed from the
                 * block list and freed.
                 */
-               LOCK_ALLOCATED_BLOCKS;
-               SGEN_ASSERT (0, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
-               SGEN_ASSERT (0, BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]) == block, "How did the block move?");
-               allocated_blocks.data [block_index] = NULL;
-               UNLOCK_ALLOCATED_BLOCKS;
+               SGEN_ASSERT (6, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
+               SGEN_ASSERT (6, allocated_blocks.data [block_index] == BLOCK_TAG_CHECKING (tagged_block), "How did the block move?");
 
                binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
                ms_free_block (block);
 
-               --num_major_sections;
-               ++num_major_sections_freed_in_sweep;
+               SGEN_ATOMIC_ADD_P (num_major_sections, -1);
 
-               return FALSE;
+               tagged_block = NULL;
        }
+
+ done:
+       allocated_blocks.data [block_index] = tagged_block;
+       return !!tagged_block;
 }
 
-static mono_native_thread_return_t
-sweep_loop_thread_func (void *dummy)
+static void
+sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
 {
        int block_index;
-       int num_blocks;
-       int small_id = mono_thread_info_register_small_id ();
+       int num_blocks = num_major_sections_before_sweep;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEEPING, "Sweep thread called with wrong state");
-
-       num_major_sections_before_sweep = num_major_sections;
-       num_major_sections_freed_in_sweep = 0;
-
-       LOCK_ALLOCATED_BLOCKS;
-       num_blocks = allocated_blocks.next_slot;
-       UNLOCK_ALLOCATED_BLOCKS;
+       SGEN_ASSERT (0, sweep_in_progress (), "Sweep thread called with wrong state");
+       SGEN_ASSERT (0, num_blocks <= allocated_blocks.next_slot, "How did we lose blocks?");
 
        /*
         * We traverse the block array from high to low.  Nursery collections will have to
@@ -1492,40 +1548,37 @@ sweep_loop_thread_func (void *dummy)
         * low to high, to avoid constantly colliding on the same blocks.
         */
        for (block_index = num_blocks - 1; block_index >= 0; --block_index) {
-               MSBlockInfo *block;
                gboolean have_checked;
 
-               LOCK_ALLOCATED_BLOCKS;
-               block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]);
-
                /*
                 * The block might have been freed by another thread doing some checking
                 * work.
                 */
-               if (!block) {
-                       UNLOCK_ALLOCATED_BLOCKS;
-                       continue;
-               }
+               if (!ensure_block_is_checked_for_sweeping (block_index, TRUE, &have_checked))
+                       ++num_major_sections_freed_in_sweep;
+       }
 
-               assert_block_state_is_consistent (block);
+       while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
+               /*
+                * The main GC thread is currently iterating over the block array to help us
+                * finish the sweep.  We have already finished, but we don't want to mess up
+                * that iteration, so we just wait for it.
+                */
+               g_usleep (100);
+       }
 
-               if (block->state == BLOCK_STATE_SWEPT) {
-                       UNLOCK_ALLOCATED_BLOCKS;
-                       continue;
+       if (SGEN_MAX_ASSERT_LEVEL >= 6) {
+               for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
+                       MSBlockInfo *block = BLOCK_UNTAG (allocated_blocks.data [block_index]);
+                       SGEN_ASSERT (6, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
                }
-
-               ensure_block_is_checked_for_sweeping (block, block_index, &have_checked);
        }
 
-       LOCK_ALLOCATED_BLOCKS;
        sgen_pointer_queue_remove_nulls (&allocated_blocks);
-       UNLOCK_ALLOCATED_BLOCKS;
 
        sweep_finish ();
 
-       mono_thread_small_id_free (small_id);
-
-       return NULL;
+       sweep_job = NULL;
 }
 
 static void
@@ -1556,27 +1609,27 @@ sweep_finish (void)
 
        want_evacuation = (float)total_evacuate_saved / (float)total_evacuate_heap > (1 - concurrent_evacuation_threshold);
 
-       sweep_state = SWEEP_STATE_SWEPT;
+       set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
 }
 
-static MonoNativeThreadId sweep_loop_thread;
-
 static void
 major_sweep (void)
 {
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_NEED_SWEEPING, "Why are we sweeping if sweeping is not needed?");
-       sweep_state = SWEEP_STATE_SWEEPING;
+       set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_NEED_SWEEPING);
 
        sweep_start ();
 
-       if (TRUE /*concurrent_mark*/) {
-               /*
-                * FIXME: We can't create a thread while the world is stopped because it
-                * might deadlock.  `finalizer-wait.exe` exposes this.
-                */
-               mono_native_thread_create (&sweep_loop_thread, sweep_loop_thread_func, NULL);
+       SGEN_ASSERT (0, num_major_sections == allocated_blocks.next_slot, "We don't know how many blocks we have?");
+
+       num_major_sections_before_sweep = num_major_sections;
+       num_major_sections_freed_in_sweep = 0;
+
+       SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
+       if (concurrent_sweep) {
+               sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
+               sgen_thread_pool_job_enqueue (sweep_job);
        } else {
-               sweep_loop_thread_func (NULL);
+               sweep_job_func (NULL, NULL);
        }
 }
 
@@ -1683,9 +1736,6 @@ major_start_nursery_collection (void)
 #endif
 
        old_num_major_sections = num_major_sections;
-
-       if (sweep_state == SWEEP_STATE_SWEEPING)
-               g_print ("sweeping during nursery collection\n");
 }
 
 static void
@@ -1702,7 +1752,7 @@ major_start_major_collection (void)
        MSBlockInfo *block;
        int i;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Major collection on unswept heap");
+       major_finish_sweep_checking ();
 
        /*
         * Clear the free lists for block sizes where we do evacuation.  For those block
@@ -1716,29 +1766,21 @@ major_start_major_collection (void)
                free_block_lists [MS_BLOCK_FLAG_REFS][i] = NULL;
        }
 
-       // Sweep all unswept blocks
-       if (lazy_sweep) {
+       if (lazy_sweep)
                MONO_GC_SWEEP_BEGIN (GENERATION_OLD, TRUE);
 
-               FOREACH_BLOCK (block) {
-                       sweep_block (block, TRUE);
-               } END_FOREACH_BLOCK;
-
-               MONO_GC_SWEEP_END (GENERATION_OLD, TRUE);
-       }
-
-       /* FIXME: Just do one iteration over the blocks in this function. */
-       FOREACH_BLOCK (block) {
+       /* Sweep all unswept blocks and set them to MARKING */
+       FOREACH_BLOCK_NO_LOCK (block) {
+               if (lazy_sweep)
+                       sweep_block (block);
                SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "All blocks must be swept when we're pinning.");
-               /*
-                * FIXME: We don't need CAS here because there's still only one thread doing
-                * stuff.
-                */
                set_block_state (block, BLOCK_STATE_MARKING, BLOCK_STATE_SWEPT);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Cannot start major collection without having finished sweeping");
-       sweep_state = SWEEP_STATE_NEED_SWEEPING;
+       if (lazy_sweep)
+               MONO_GC_SWEEP_END (GENERATION_OLD, TRUE);
+
+       set_sweep_state (SWEEP_STATE_NEED_SWEEPING, SWEEP_STATE_SWEPT);
 }
 
 static void
@@ -1772,9 +1814,10 @@ compare_pointers (const void *va, const void *vb) {
  * This is called with sweep completed and the world stopped.
  */
 static void
-major_free_swept_blocks (void)
+major_free_swept_blocks (size_t allowance)
 {
-       size_t section_reserve = sgen_get_minor_collection_allowance () / MS_BLOCK_SIZE;
+       /* FIXME: This is probably too much.  It's assuming all objects are small. */
+       size_t section_reserve = allowance / MS_BLOCK_SIZE;
 
        SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
 
@@ -1827,7 +1870,7 @@ major_free_swept_blocks (void)
                        for (i = 0; i < arr_length; ++i) {
                                int d = dest;
                                void *block = empty_block_arr [i];
-                               SGEN_ASSERT (0, block, "we're not shifting correctly");
+                               SGEN_ASSERT (6, block, "we're not shifting correctly");
                                if (i != dest) {
                                        empty_block_arr [dest] = block;
                                        /*
@@ -1843,7 +1886,7 @@ major_free_swept_blocks (void)
                                        continue;
                                }
 
-                               SGEN_ASSERT (0, first >= 0 && d > first, "algorithm is wrong");
+                               SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
 
                                if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) {
                                        first = d;
@@ -1876,9 +1919,9 @@ major_free_swept_blocks (void)
                                }
                        }
 
-                       SGEN_ASSERT (0, dest <= i && dest <= arr_length, "array length is off");
+                       SGEN_ASSERT (6, dest <= i && dest <= arr_length, "array length is off");
                        arr_length = dest;
-                       SGEN_ASSERT (0, arr_length == num_empty_blocks, "array length is off");
+                       SGEN_ASSERT (6, arr_length == num_empty_blocks, "array length is off");
 
                        num_blocks >>= 1;
                }
@@ -1887,7 +1930,7 @@ major_free_swept_blocks (void)
                rebuild_next = (void**)&empty_blocks;
                for (i = 0; i < arr_length; ++i) {
                        void *block = empty_block_arr [i];
-                       SGEN_ASSERT (0, block, "we're missing blocks");
+                       SGEN_ASSERT (6, block, "we're missing blocks");
                        *rebuild_next = block;
                        rebuild_next = (void**)block;
                }
@@ -1925,20 +1968,18 @@ major_free_swept_blocks (void)
        }
 }
 
-/* FIXME: Unify `major_find_pin_queue_start_ends` and `major_pin_objects`. */
 static void
 major_pin_objects (SgenGrayQueue *queue)
 {
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Cannot iterate blocks during sweep");
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                size_t first_entry, last_entry;
-               SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT || block->state == BLOCK_STATE_MARKING, "All blocks must be swept when we're pinning.");
+               SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
                sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
                                &first_entry, &last_entry);
                mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static void
@@ -1958,18 +1999,24 @@ major_get_used_size (void)
        gint64 size = 0;
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Cannot iterate blocks during sweep");
-       FOREACH_BLOCK (block) {
+       /*
+        * We're holding the GC lock, but the sweep thread might be running.  Make sure it's
+        * finished, then we can iterate over the block array.
+        */
+       major_finish_sweep_checking ();
+
+       FOREACH_BLOCK_NO_LOCK_CONDITION (TRUE, block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                void **iter;
                size += count * block->obj_size;
                for (iter = block->free_list; iter; iter = (void**)*iter)
                        size -= block->obj_size;
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        return size;
 }
 
+/* FIXME: return number of bytes, not of sections */
 static size_t
 get_num_major_sections (void)
 {
@@ -1977,14 +2024,15 @@ get_num_major_sections (void)
 }
 
 /*
- * Returns the number of major sections that were present when the last sweep was initiated,
- * and were not freed during the sweep.  They are the basis for calculating the allowance.
+ * Returns the number of bytes in blocks that were present when the last sweep was
+ * initiated, and were not freed during the sweep.  They are the basis for calculating the
+ * allowance.
  */
 static size_t
-get_num_major_unswept_old_sections (void)
+get_bytes_survived_last_sweep (void)
 {
        SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep");
-       return num_major_sections_before_sweep - num_major_sections_freed_in_sweep;
+       return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE;
 }
 
 static gboolean
@@ -2005,6 +2053,12 @@ major_handle_gc_param (const char *opt)
        } else if (!strcmp (opt, "no-lazy-sweep")) {
                lazy_sweep = FALSE;
                return TRUE;
+       } else if (!strcmp (opt, "concurrent-sweep")) {
+               concurrent_sweep = TRUE;
+               return TRUE;
+       } else if (!strcmp (opt, "no-concurrent-sweep")) {
+               concurrent_sweep = FALSE;
+               return TRUE;
        }
 
        return FALSE;
@@ -2017,6 +2071,7 @@ major_print_gc_param_usage (void)
                        ""
                        "  evacuation-threshold=P (where P is a percentage, an integer in 0-100)\n"
                        "  (no-)lazy-sweep\n"
+                       "  (no-)concurrent-sweep\n"
                        );
 }
 
@@ -2024,22 +2079,16 @@ major_print_gc_param_usage (void)
  * This callback is used to clear cards, move cards to the shadow table and do counting.
  */
 static void
-major_iterate_live_block_ranges (sgen_cardtable_block_callback callback, gboolean requires_sweep)
+major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
 {
        MSBlockInfo *block;
        gboolean has_references;
 
-       if (requires_sweep)
-               major_finish_sweeping ();
-
-       /*
-        * FIXME: Don't take the lock for the whole allocated blocks array because we're
-        * stopping the sweep thread.
-        */
-       FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                if (has_references)
                        callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 #ifdef HEAVY_STATISTICS
@@ -2159,9 +2208,8 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanObjectFun
                start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
                end = start + CARD_SIZE_IN_BYTES;
 
-               assert_block_state_is_consistent (block);
-               if (block->state != BLOCK_STATE_SWEPT && block->state != BLOCK_STATE_MARKING)
-                       sweep_block (block, FALSE);
+               if (!block_is_swept_or_marking (block))
+                       sweep_block (block);
 
                HEAVY_STAT (++marked_cards);
 
@@ -2181,6 +2229,8 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanObjectFun
 
                obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
 
+               binary_protocol_card_scan (first_obj, end - first_obj);
+
                while (obj < end) {
                        if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
                                goto next_object;
@@ -2207,7 +2257,6 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanObjectFun
                }
 
                HEAVY_STAT (if (*card_data) ++remarked_cards);
-               binary_protocol_card_scan (first_obj, obj - first_obj);
 
                if (small_objects)
                        ++card_data;
@@ -2220,26 +2269,18 @@ static void
 major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
 {
        ScanObjectFunc scan_func = sgen_get_current_object_ops ()->scan_object;
-       int block_index;
-       gboolean do_sweep_checking = sweep_state == SWEEP_STATE_SWEEPING;
+       MSBlockInfo *block;
+       gboolean has_references;
 
        if (!concurrent_mark)
                g_assert (!mod_union);
 
-       /*
-        * We're running with the world stopped and the only other thread doing work is the
-        * sweep thread, which doesn't add blocks to the array, so we can safely access
-        * `next_slot` without locking.
-        */
-       for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index) {
-               //gboolean has_references;
-               void *tagged_block;
-               MSBlockInfo *block;
-
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
 #ifdef PREFETCH_CARDS
-               int prefetch_index = block_index + 6;
+               int prefetch_index = __index + 6;
                if (prefetch_index < allocated_blocks.next_slot) {
-                       MSBlockInfo *prefetch_block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [prefetch_index]);
+                       MSBlockInfo *prefetch_block = BLOCK_UNTAG (allocated_blocks.data [prefetch_index]);
                        guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
                        PREFETCH_READ (prefetch_block);
                        PREFETCH_WRITE (prefetch_cards);
@@ -2247,29 +2288,11 @@ major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
                 }
 #endif
 
-               if (do_sweep_checking) {
-                       LOCK_ALLOCATED_BLOCKS;
-                       tagged_block = allocated_blocks.data [block_index];
-                       block = BLOCK_UNTAG_HAS_REFERENCES (tagged_block);
-
-                       if (!block || !BLOCK_IS_TAGGED_HAS_REFERENCES (tagged_block)) {
-                               UNLOCK_ALLOCATED_BLOCKS;
-                               continue;
-                       }
-
-                       if (!ensure_block_is_checked_for_sweeping (block, block_index, NULL))
-                               continue;
-               } else {
-                       tagged_block = allocated_blocks.data [block_index];
-                       block = BLOCK_UNTAG_HAS_REFERENCES (tagged_block);
-                       SGEN_ASSERT (0, block, "Why are there holes in the block array when we're not sweeping?");
-
-                       if (!BLOCK_IS_TAGGED_HAS_REFERENCES (tagged_block))
-                               continue;
-               }
+               if (!has_references)
+                       continue;
 
                scan_card_table_for_block (block, mod_union, scan_func, queue);
-       }
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static void
@@ -2280,12 +2303,13 @@ major_count_cards (long long *num_total_cards, long long *num_marked_cards)
        long long total_cards = 0;
        long long marked_cards = 0;
 
-       if (sweep_state == SWEEP_STATE_SWEEPING) {
+       if (sweep_in_progress ()) {
                *num_total_cards = -1;
                *num_marked_cards = -1;
+               return;
        }
 
-       FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                guint8 *cards = sgen_card_table_get_card_scan_address ((mword) MS_BLOCK_FOR_BLOCK_INFO (block));
                int i;
 
@@ -2297,7 +2321,7 @@ major_count_cards (long long *num_total_cards, long long *num_marked_cards)
                        if (cards [i])
                                ++marked_cards;
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        *num_total_cards = total_cards;
        *num_marked_cards = marked_cards;
@@ -2308,15 +2332,14 @@ update_cardtable_mod_union (void)
 {
        MSBlockInfo *block;
 
-       SGEN_ASSERT (0, sweep_state != SWEEP_STATE_SWEEPING, "Cannot iterate blocks during sweep");
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                size_t num_cards;
 
                block->cardtable_mod_union = sgen_card_table_update_mod_union (block->cardtable_mod_union,
                                MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards);
 
-               SGEN_ASSERT (0, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
-       } END_FOREACH_BLOCK;
+               SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static guint8*
@@ -2333,6 +2356,7 @@ static void
 post_param_init (SgenMajorCollector *collector)
 {
        collector->sweeps_lazily = lazy_sweep;
+       collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
 }
 
 static void
@@ -2385,13 +2409,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->section_size = MAJOR_SECTION_SIZE;
 
        concurrent_mark = is_concurrent;
-       if (is_concurrent) {
-               collector->is_concurrent = TRUE;
+       collector->is_concurrent = is_concurrent;
+       collector->needs_thread_pool = is_concurrent || concurrent_sweep;
+       if (is_concurrent)
                collector->want_synchronous_collection = &want_evacuation;
-       } else {
-               collector->is_concurrent = FALSE;
+       else
                collector->want_synchronous_collection = NULL;
-       }
        collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
        collector->supports_cardtable = TRUE;
 
@@ -2415,7 +2438,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->init_to_space = major_init_to_space;
        collector->sweep = major_sweep;
        collector->have_swept = major_have_swept;
-       collector->finish_sweeping = major_finish_sweeping;
+       collector->finish_sweeping = major_finish_sweep_checking;
        collector->free_swept_blocks = major_free_swept_blocks;
        collector->check_scan_starts = major_check_scan_starts;
        collector->dump_heap = major_dump_heap;
@@ -2428,7 +2451,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->obj_is_from_pinned_alloc = obj_is_from_pinned_alloc;
        collector->report_pinned_memory_usage = major_report_pinned_memory_usage;
        collector->get_num_major_sections = get_num_major_sections;
-       collector->get_num_major_unswept_old_sections = get_num_major_unswept_old_sections;
+       collector->get_bytes_survived_last_sweep = get_bytes_survived_last_sweep;
        collector->handle_gc_param = major_handle_gc_param;
        collector->print_gc_param_usage = major_print_gc_param_usage;
        collector->post_param_init = post_param_init;
@@ -2445,7 +2468,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        }
 
 #if !defined (FIXED_HEAP) && !defined (SGEN_PARALLEL_MARK)
-       /* FIXME: this will not work with evacuation or the split nursery. */
        if (!is_concurrent)
                collector->drain_gray_stack = drain_gray_stack;
 
@@ -2467,8 +2489,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
 #endif
 #endif
 
-       mono_mutex_init (&allocated_blocks_lock);
-
 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
        mono_mutex_init (&scanned_objects_list_lock);
 #endif