[runtime] Further removed code that doubted IMT
[mono.git] / mono / metadata / sgen-marksweep.c
index 5d64eb80f69aa34a94b617da8200f3f38b57bd94..bd2d3bbbbcf946e218802a4996f4f923e7d26b9a 100644 (file)
@@ -43,6 +43,7 @@
 #include "metadata/sgen-pointer-queue.h"
 #include "metadata/sgen-pinning.h"
 #include "metadata/sgen-workers.h"
+#include "metadata/sgen-thread-pool.h"
 
 #if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
 #define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
@@ -77,7 +78,7 @@
  * SWEPT           The block is fully swept.  It might or might not be in
  *                 a free list.
  *
- * NOT_SWEPT       The block might or might not contain live objects.  If
+ * MARKING         The block might or might not contain live objects.  If
  *                 we're in between an initial collection pause and the
  *                 finishing pause, the block might or might not be in a
  *                 free list.
@@ -109,7 +110,7 @@ struct _MSBlockInfo {
         * recalculating to save the space.
         */
        guint16 obj_size_index;
-       /* FIXME: reduce this */
+       /* FIXME: Reduce this - it only needs a byte. */
        volatile gint32 state;
        unsigned int pinned : 1;
        unsigned int has_references : 1;
@@ -188,26 +189,39 @@ enum {
 static volatile int sweep_state = SWEEP_STATE_SWEPT;
 
 static gboolean concurrent_mark;
+static gboolean concurrent_sweep = TRUE;
 
 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)     SGEN_POINTER_IS_TAGGED_1 ((bl))
 #define BLOCK_TAG_HAS_REFERENCES(bl)           SGEN_POINTER_TAG_1 ((bl))
-#define BLOCK_UNTAG_HAS_REFERENCES(bl)         SGEN_POINTER_UNTAG_1 ((bl))
 
-#define BLOCK_TAG(bl)  ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
+#define BLOCK_IS_TAGGED_CHECKING(bl)           SGEN_POINTER_IS_TAGGED_2 ((bl))
+#define BLOCK_TAG_CHECKING(bl)                 SGEN_POINTER_TAG_2 ((bl))
+
+#define BLOCK_UNTAG(bl)                                SGEN_POINTER_UNTAG_12 ((bl))
+
+#define BLOCK_TAG(bl)                          ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
 
 /* all allocated blocks in the system */
 static SgenPointerQueue allocated_blocks;
-static mono_mutex_t allocated_blocks_lock;
-
-#define LOCK_ALLOCATED_BLOCKS  mono_mutex_lock (&allocated_blocks_lock)
-#define UNLOCK_ALLOCATED_BLOCKS        mono_mutex_unlock (&allocated_blocks_lock)
 
 /* non-allocated block free-list */
 static void *empty_blocks = NULL;
 static size_t num_empty_blocks = 0;
 
-#define FOREACH_BLOCK_NO_LOCK(bl)      { size_t __index; SGEN_ASSERT (0, sgen_is_world_stopped () && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [__index]);
-#define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr)    { size_t __index; SGEN_ASSERT (0, sgen_is_world_stopped () && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = allocated_blocks.data [__index]; (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); (bl) = BLOCK_UNTAG_HAS_REFERENCES ((bl));
+#define FOREACH_BLOCK_NO_LOCK_CONDITION(cond,bl) {                     \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, (cond) && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = BLOCK_UNTAG (allocated_blocks.data [__index]);
+#define FOREACH_BLOCK_NO_LOCK(bl)                                      \
+       FOREACH_BLOCK_NO_LOCK_CONDITION(sgen_is_world_stopped (), bl)
+#define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                  \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, sgen_is_world_stopped () && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = allocated_blocks.data [__index];                 \
+               (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
+               (bl) = BLOCK_UNTAG ((bl));
 #define END_FOREACH_BLOCK_NO_LOCK      } }
 
 static volatile size_t num_major_sections = 0;
@@ -266,8 +280,7 @@ add_scanned_object (void *ptr)
 }
 #endif
 
-static void
-sweep_block (MSBlockInfo *block);
+static gboolean sweep_block (MSBlockInfo *block);
 
 static int
 ms_find_block_obj_size_index (size_t size)
@@ -403,6 +416,13 @@ sweep_in_progress (void)
                state == SWEEP_STATE_COMPACTING;
 }
 
+static inline gboolean
+block_is_swept_or_marking (MSBlockInfo *block)
+{
+       gint32 state = block->state;
+       return state == BLOCK_STATE_SWEPT || state == BLOCK_STATE_MARKING;
+}
+
 //#define MARKSWEEP_CONSISTENCY_CHECK
 
 #ifdef MARKSWEEP_CONSISTENCY_CHECK
@@ -461,7 +481,7 @@ consistency_check (void)
                g_assert (num_free == 0);
 
                /* check all mark words are zero */
-               if (!sgen_concurrent_collection_in_progress () && (block->state == BLOCK_STATE_SWEPT || block->state == BLOCK_STATE_MARKING)) {
+               if (!sgen_concurrent_collection_in_progress () && block_is_swept_or_marking (block)) {
                        for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
                                g_assert (block->mark_words [i] == 0);
                }
@@ -487,6 +507,8 @@ add_free_block (MSBlockInfo * volatile *free_blocks, int size_index, MSBlockInfo
        } while (SGEN_CAS_PTR ((gpointer)&free_blocks [size_index], block, old) != old);
 }
 
+static void major_finish_sweep_checking (void);
+
 static gboolean
 ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
 {
@@ -517,8 +539,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         */
        info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD);
        info->state = (info->is_to_space || sgen_concurrent_collection_in_progress ()) ? BLOCK_STATE_MARKING : BLOCK_STATE_SWEPT;
-       if (sweep_in_progress ())
-               SGEN_ASSERT (0, info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
+       SGEN_ASSERT (6, !sweep_in_progress () || info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
        info->cardtable_mod_union = NULL;
 
        update_heap_boundaries_for_block (info);
@@ -539,9 +560,15 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
 
        add_free_block (free_blocks, size_index, info);
 
-       LOCK_ALLOCATED_BLOCKS;
+       /*
+        * This is the only place where the `allocated_blocks` array can potentially grow.
+        * We need to make sure concurrent sweep isn't running when that happens, so in that
+        * specific case we just wait for sweep to finish.
+        */
+       if (sgen_pointer_queue_will_grow (&allocated_blocks))
+               major_finish_sweep_checking ();
+
        sgen_pointer_queue_add (&allocated_blocks, BLOCK_TAG (info));
-       UNLOCK_ALLOCATED_BLOCKS;
 
        SGEN_ATOMIC_ADD_P (num_major_sections, 1);
        return TRUE;
@@ -572,8 +599,8 @@ ensure_can_access_block_free_list (MSBlockInfo *block)
                        SGEN_ASSERT (0, FALSE, "How did we get a block that's being checked from a free list?");
                        break;
                case BLOCK_STATE_NEED_SWEEPING:
-                       stat_major_blocks_lazy_swept ++;
-                       sweep_block (block);
+                       if (sweep_block (block))
+                               ++stat_major_blocks_lazy_swept;
                        break;
                case BLOCK_STATE_SWEEPING:
                        /* FIXME: do this more elegantly */
@@ -599,7 +626,7 @@ unlink_slot_from_free_list_uncontested (MSBlockInfo * volatile *free_blocks, int
        ensure_can_access_block_free_list (block);
 
        obj = block->free_list;
-       SGEN_ASSERT (0, obj, "block %p in free list had no available object to alloc from", block);
+       SGEN_ASSERT (6, obj, "block %p in free list had no available object to alloc from", block);
 
        next_free_slot = *(void**)obj;
        if (next_free_slot) {
@@ -656,7 +683,7 @@ free_object (char *obj, size_t size, gboolean pinned)
        int word, bit;
        gboolean in_free_list;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
+       SGEN_ASSERT (9, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
 
        ensure_can_access_block_free_list (block);
        SGEN_ASSERT (9, (pinned && block->pinned) || (!pinned && !block->pinned), "free-object pinning mixup object %p pinned %d block %p pinned %d", obj, pinned, block, block->pinned);
@@ -786,12 +813,15 @@ set_sweep_state (int new, int expected)
        SGEN_ASSERT (0, success, "Could not set sweep state.");
 }
 
-static gboolean ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gboolean *have_checked);
+static gboolean ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked);
+
+static SgenThreadPoolJob * volatile sweep_job;
 
 static void
-major_finish_sweeping (void)
+major_finish_sweep_checking (void)
 {
        int block_index;
+       SgenThreadPoolJob *job;
 
  retry:
        switch (sweep_state) {
@@ -806,8 +836,7 @@ major_finish_sweeping (void)
                SGEN_ASSERT (0, FALSE, "Is there another minor collection running?");
                goto retry;
        case SWEEP_STATE_COMPACTING:
-               g_usleep (100);
-               goto retry;
+               goto wait;
        default:
                SGEN_ASSERT (0, FALSE, "Invalid sweep state.");
                break;
@@ -816,25 +845,19 @@ major_finish_sweeping (void)
        /*
         * We're running with the world stopped and the only other thread doing work is the
         * sweep thread, which doesn't add blocks to the array, so we can safely access
-        * `next_slot` without locking.
+        * `next_slot`.
         */
-       for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index) {
-               MSBlockInfo *block;
-
-               LOCK_ALLOCATED_BLOCKS;
-               block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]);
-
-               if (!block) {
-                       UNLOCK_ALLOCATED_BLOCKS;
-                       continue;
-               }
-
-               ensure_block_is_checked_for_sweeping (block, block_index, NULL);
-       }
+       for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index)
+               ensure_block_is_checked_for_sweeping (block_index, FALSE, NULL);
 
        set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_SWEEPING_AND_ITERATING);
-       while (sweep_state != SWEEP_STATE_SWEPT)
-               g_usleep (100);
+
+ wait:
+       job = sweep_job;
+       if (job)
+               sgen_thread_pool_job_wait (job);
+       SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
 }
 
 static void
@@ -845,7 +868,7 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
        gboolean pinned = flags & ITERATE_OBJECTS_PINNED;
        MSBlockInfo *block;
 
-       major_finish_sweeping ();
+       major_finish_sweep_checking ();
        FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int i;
@@ -855,16 +878,21 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
                if (!block->pinned && !non_pinned)
                        continue;
                if (sweep && lazy_sweep) {
-                       /* FIXME: We can't just call `sweep_block` willy-nilly. */
                        sweep_block (block);
-                       SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
+                       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
                }
 
                for (i = 0; i < count; ++i) {
                        void **obj = (void**) MS_BLOCK_OBJ (block, i);
-                       /* FIXME: This condition is probably incorrect. */
-                       if (block->state != BLOCK_STATE_SWEPT && block->state != BLOCK_STATE_MARKING) {
+                       /*
+                        * We've finished sweep checking, but if we're sweeping lazily and
+                        * the flags don't require us to sweep, the block might still need
+                        * sweeping.  In that case, we need to consult the mark bits to tell
+                        * us whether an object slot is live.
+                        */
+                       if (!block_is_swept_or_marking (block)) {
                                int word, bit;
+                               SGEN_ASSERT (6, !sweep && block->state == BLOCK_STATE_NEED_SWEEPING, "Has sweeping not finished?");
                                MS_CALC_MARK_BIT (word, bit, obj);
                                if (!MS_MARK_BIT (block, word, bit))
                                        continue;
@@ -1048,7 +1076,7 @@ static void
 major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
 {
        SGEN_ASSERT (9, sgen_concurrent_collection_in_progress (), "Why are we scanning concurrently when there's no concurrent collection on?");
-       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_is_worker_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
+       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
 
        g_assert (!SGEN_OBJECT_IS_FORWARDED (obj));
 
@@ -1227,7 +1255,7 @@ sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
        }
 }
 
-static gboolean
+static inline gboolean
 try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
 {
        gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
@@ -1237,23 +1265,20 @@ try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state
        return success;
 }
 
-/*
- * FIXME: This only CASes to catch errors.  It's not needed for correctness.
- */
-static void
+static inline void
 set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
 {
-       gboolean success = try_set_block_state (block, new_state, expected_state);
-       SGEN_ASSERT (0, success, "Couldn't set block state");
-       SGEN_ASSERT (0, block->state == new_state, "Block state incorrect after set");
+       SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
+       block->state = new_state;
 }
 
 /*
- * sweep_block:
+ * If `block` needs sweeping, sweep it and return TRUE.  Otherwise return FALSE.
  *
- *   Traverse BLOCK, freeing and zeroing unused objects.
+ * Sweeping means iterating through the block's slots and building the free-list from the
+ * unmarked ones.  They will also be zeroed.  The mark bits will be reset.
  */
-static void
+static gboolean
 sweep_block (MSBlockInfo *block)
 {
        int count;
@@ -1262,7 +1287,7 @@ sweep_block (MSBlockInfo *block)
  retry:
        switch (block->state) {
        case BLOCK_STATE_SWEPT:
-               return;
+               return FALSE;
        case BLOCK_STATE_MARKING:
        case BLOCK_STATE_CHECKING:
                SGEN_ASSERT (0, FALSE, "How did we get to sweep a block that's being marked or being checked?");
@@ -1279,7 +1304,7 @@ sweep_block (MSBlockInfo *block)
                SGEN_ASSERT (0, FALSE, "Illegal block state");
        }
 
-       SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
 
        count = MS_BLOCK_FREE / block->obj_size;
 
@@ -1312,6 +1337,8 @@ sweep_block (MSBlockInfo *block)
        mono_memory_write_barrier ();
 
        set_block_state (block, BLOCK_STATE_SWEPT, BLOCK_STATE_SWEEPING);
+
+       return TRUE;
 }
 
 static inline int
@@ -1361,74 +1388,71 @@ sweep_start (void)
 static void sweep_finish (void);
 
 /*
- * LOCKING: The allocated blocks lock must be held when entering this function.  `block`
- * must have been loaded from the array with the lock held.  This function will unlock the
- * lock.
+ * If `wait` is TRUE and the block is currently being checked, this function will wait until
+ * the checking has finished.
  *
- * Returns whether the block is still there.
+ * Returns whether the block is still there.  If `wait` is FALSE, the return value will not
+ * be correct, i.e. must not be used.
  */
 static gboolean
-ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gboolean *have_checked)
+ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked)
 {
        int count;
        gboolean have_live = FALSE;
        gboolean have_free = FALSE;
        int nused = 0;
-       int block_state = block->state;
+       int block_state;
        int i;
+       void *tagged_block;
+       MSBlockInfo *block;
+
+       SGEN_ASSERT (6, sweep_in_progress (), "Why do we call this function if there's no sweep in progress?");
 
        if (have_checked)
                *have_checked = FALSE;
 
+ retry:
+       tagged_block = *(void * volatile *)&allocated_blocks.data [block_index];
+       if (!tagged_block)
+               return FALSE;
+
+       if (BLOCK_IS_TAGGED_CHECKING (tagged_block)) {
+               if (!wait)
+                       return FALSE;
+               /* FIXME: do this more elegantly */
+               g_usleep (100);
+               goto retry;
+       }
+
+       if (SGEN_CAS_PTR (&allocated_blocks.data [block_index], BLOCK_TAG_CHECKING (tagged_block), tagged_block) != tagged_block)
+               goto retry;
+
+       block = BLOCK_UNTAG (tagged_block);
+       block_state = block->state;
+
        if (!sweep_in_progress ()) {
-               SGEN_ASSERT (0, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
+               SGEN_ASSERT (6, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
                if (!lazy_sweep)
-                       SGEN_ASSERT (0, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
+                       SGEN_ASSERT (6, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
        }
 
- retry:
        switch (block_state) {
        case BLOCK_STATE_SWEPT:
        case BLOCK_STATE_NEED_SWEEPING:
        case BLOCK_STATE_SWEEPING:
-               UNLOCK_ALLOCATED_BLOCKS;
-               return TRUE;
+               goto done;
        case BLOCK_STATE_MARKING:
-               if (sweep_in_progress ())
-                       break;
-               UNLOCK_ALLOCATED_BLOCKS;
-               return TRUE;
-       case BLOCK_STATE_CHECKING: {
-               MSBlockInfo *block_before = block;
-               /*
-                * FIXME: do this more elegantly.
-                *
-                * Also, when we're called from the sweep thread, we don't actually have to
-                * wait for it to finish, because the sweep thread doesn't use the block.
-                * However, the sweep thread needs to know when all the blocks have been
-                * checked (so it can set the global sweep state to SWEPT), so we'd have to
-                * do some kind of accounting if we don't wait.
-                */
-               UNLOCK_ALLOCATED_BLOCKS;
-               g_usleep (100);
-               LOCK_ALLOCATED_BLOCKS;
-               block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]);
-               if (!block) {
-                       UNLOCK_ALLOCATED_BLOCKS;
-                       return FALSE;
-               }
-               SGEN_ASSERT (0, block == block_before, "How did the block get exchanged for a different one?");
-               block_state = block->state;
-               goto retry;
-       }
+               break;
+       case BLOCK_STATE_CHECKING:
+               SGEN_ASSERT (0, FALSE, "We set the CHECKING bit - how can the stage be CHECKING?");
+               goto done;
        default:
                SGEN_ASSERT (0, FALSE, "Illegal block state");
                break;
        }
 
-       SGEN_ASSERT (0, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
        set_block_state (block, BLOCK_STATE_CHECKING, BLOCK_STATE_MARKING);
-       UNLOCK_ALLOCATED_BLOCKS;
 
        if (have_checked)
                *have_checked = TRUE;
@@ -1479,44 +1503,41 @@ ensure_block_is_checked_for_sweeping (MSBlockInfo *block, int block_index, gbool
                 */
                if (have_free) {
                        MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
-                       int index = MS_BLOCK_OBJ_SIZE_INDEX (block->obj_size);
 
                        if (!lazy_sweep)
-                               SGEN_ASSERT (0, block->free_list, "How do we not have a free list when there are free slots?");
+                               SGEN_ASSERT (6, block->free_list, "How do we not have a free list when there are free slots?");
 
-                       add_free_block (free_blocks, index, block);
+                       add_free_block (free_blocks, obj_size_index, block);
                }
 
                /* FIXME: Do we need the heap boundaries while we do nursery collections? */
                update_heap_boundaries_for_block (block);
-
-               return TRUE;
        } else {
                /*
                 * Blocks without live objects are removed from the
                 * block list and freed.
                 */
-               LOCK_ALLOCATED_BLOCKS;
-               SGEN_ASSERT (0, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
-               SGEN_ASSERT (0, BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]) == block, "How did the block move?");
-               allocated_blocks.data [block_index] = NULL;
-               UNLOCK_ALLOCATED_BLOCKS;
+               SGEN_ASSERT (6, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
+               SGEN_ASSERT (6, allocated_blocks.data [block_index] == BLOCK_TAG_CHECKING (tagged_block), "How did the block move?");
 
                binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
                ms_free_block (block);
 
                SGEN_ATOMIC_ADD_P (num_major_sections, -1);
 
-               return FALSE;
+               tagged_block = NULL;
        }
+
+ done:
+       allocated_blocks.data [block_index] = tagged_block;
+       return !!tagged_block;
 }
 
-static mono_native_thread_return_t
-sweep_loop_thread_func (void *dummy)
+static void
+sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
 {
        int block_index;
        int num_blocks = num_major_sections_before_sweep;
-       int small_id = mono_thread_info_register_small_id ();
 
        SGEN_ASSERT (0, sweep_in_progress (), "Sweep thread called with wrong state");
        SGEN_ASSERT (0, num_blocks <= allocated_blocks.next_slot, "How did we lose blocks?");
@@ -1527,47 +1548,37 @@ sweep_loop_thread_func (void *dummy)
         * low to high, to avoid constantly colliding on the same blocks.
         */
        for (block_index = num_blocks - 1; block_index >= 0; --block_index) {
-               MSBlockInfo *block;
                gboolean have_checked;
 
-               LOCK_ALLOCATED_BLOCKS;
-               block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]);
-
                /*
                 * The block might have been freed by another thread doing some checking
                 * work.
                 */
-               if (!block) {
-                       UNLOCK_ALLOCATED_BLOCKS;
+               if (!ensure_block_is_checked_for_sweeping (block_index, TRUE, &have_checked))
                        ++num_major_sections_freed_in_sweep;
-                       continue;
-               }
-
-               if (block->state == BLOCK_STATE_SWEPT) {
-                       UNLOCK_ALLOCATED_BLOCKS;
-                       continue;
-               }
-
-               ensure_block_is_checked_for_sweeping (block, block_index, &have_checked);
        }
 
-       while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING))
+       while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
+               /*
+                * The main GC thread is currently iterating over the block array to help us
+                * finish the sweep.  We have already finished, but we don't want to mess up
+                * that iteration, so we just wait for it.
+                */
                g_usleep (100);
+       }
 
-       LOCK_ALLOCATED_BLOCKS;
-       for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
-               MSBlockInfo *block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [block_index]);
-               SGEN_ASSERT (0, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
+       if (SGEN_MAX_ASSERT_LEVEL >= 6) {
+               for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
+                       MSBlockInfo *block = BLOCK_UNTAG (allocated_blocks.data [block_index]);
+                       SGEN_ASSERT (6, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
+               }
        }
 
        sgen_pointer_queue_remove_nulls (&allocated_blocks);
-       UNLOCK_ALLOCATED_BLOCKS;
 
        sweep_finish ();
 
-       mono_thread_small_id_free (small_id);
-
-       return NULL;
+       sweep_job = NULL;
 }
 
 static void
@@ -1601,8 +1612,6 @@ sweep_finish (void)
        set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
 }
 
-static MonoNativeThreadId sweep_loop_thread;
-
 static void
 major_sweep (void)
 {
@@ -1615,14 +1624,12 @@ major_sweep (void)
        num_major_sections_before_sweep = num_major_sections;
        num_major_sections_freed_in_sweep = 0;
 
-       if (TRUE /*concurrent_mark*/) {
-               /*
-                * FIXME: We can't create a thread while the world is stopped because it
-                * might deadlock.  `finalizer-wait.exe` exposes this.
-                */
-               mono_native_thread_create (&sweep_loop_thread, sweep_loop_thread_func, NULL);
+       SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
+       if (concurrent_sweep) {
+               sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
+               sgen_thread_pool_job_enqueue (sweep_job);
        } else {
-               sweep_loop_thread_func (NULL);
+               sweep_job_func (NULL, NULL);
        }
 }
 
@@ -1729,9 +1736,6 @@ major_start_nursery_collection (void)
 #endif
 
        old_num_major_sections = num_major_sections;
-
-       if (sweep_in_progress ())
-               g_print ("sweeping during nursery collection\n");
 }
 
 static void
@@ -1748,7 +1752,7 @@ major_start_major_collection (void)
        MSBlockInfo *block;
        int i;
 
-       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Major collection on unswept heap");
+       major_finish_sweep_checking ();
 
        /*
         * Clear the free lists for block sizes where we do evacuation.  For those block
@@ -1810,10 +1814,10 @@ compare_pointers (const void *va, const void *vb) {
  * This is called with sweep completed and the world stopped.
  */
 static void
-major_free_swept_blocks (void)
+major_free_swept_blocks (size_t allowance)
 {
-       /* FIXME: use something sensible here. */
-       size_t section_reserve = DEFAULT_NURSERY_SIZE * 2 / MS_BLOCK_SIZE;
+       /* FIXME: This is probably too much.  It's assuming all objects are small. */
+       size_t section_reserve = allowance / MS_BLOCK_SIZE;
 
        SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
 
@@ -1866,7 +1870,7 @@ major_free_swept_blocks (void)
                        for (i = 0; i < arr_length; ++i) {
                                int d = dest;
                                void *block = empty_block_arr [i];
-                               SGEN_ASSERT (0, block, "we're not shifting correctly");
+                               SGEN_ASSERT (6, block, "we're not shifting correctly");
                                if (i != dest) {
                                        empty_block_arr [dest] = block;
                                        /*
@@ -1882,7 +1886,7 @@ major_free_swept_blocks (void)
                                        continue;
                                }
 
-                               SGEN_ASSERT (0, first >= 0 && d > first, "algorithm is wrong");
+                               SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
 
                                if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) {
                                        first = d;
@@ -1915,9 +1919,9 @@ major_free_swept_blocks (void)
                                }
                        }
 
-                       SGEN_ASSERT (0, dest <= i && dest <= arr_length, "array length is off");
+                       SGEN_ASSERT (6, dest <= i && dest <= arr_length, "array length is off");
                        arr_length = dest;
-                       SGEN_ASSERT (0, arr_length == num_empty_blocks, "array length is off");
+                       SGEN_ASSERT (6, arr_length == num_empty_blocks, "array length is off");
 
                        num_blocks >>= 1;
                }
@@ -1926,7 +1930,7 @@ major_free_swept_blocks (void)
                rebuild_next = (void**)&empty_blocks;
                for (i = 0; i < arr_length; ++i) {
                        void *block = empty_block_arr [i];
-                       SGEN_ASSERT (0, block, "we're missing blocks");
+                       SGEN_ASSERT (6, block, "we're missing blocks");
                        *rebuild_next = block;
                        rebuild_next = (void**)block;
                }
@@ -1971,7 +1975,7 @@ major_pin_objects (SgenGrayQueue *queue)
 
        FOREACH_BLOCK_NO_LOCK (block) {
                size_t first_entry, last_entry;
-               SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT || block->state == BLOCK_STATE_MARKING, "All blocks must be swept when we're pinning.");
+               SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
                sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
                                &first_entry, &last_entry);
                mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
@@ -1995,7 +1999,13 @@ major_get_used_size (void)
        gint64 size = 0;
        MSBlockInfo *block;
 
-       FOREACH_BLOCK_NO_LOCK (block) {
+       /*
+        * We're holding the GC lock, but the sweep thread might be running.  Make sure it's
+        * finished, then we can iterate over the block array.
+        */
+       major_finish_sweep_checking ();
+
+       FOREACH_BLOCK_NO_LOCK_CONDITION (TRUE, block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                void **iter;
                size += count * block->obj_size;
@@ -2043,6 +2053,12 @@ major_handle_gc_param (const char *opt)
        } else if (!strcmp (opt, "no-lazy-sweep")) {
                lazy_sweep = FALSE;
                return TRUE;
+       } else if (!strcmp (opt, "concurrent-sweep")) {
+               concurrent_sweep = TRUE;
+               return TRUE;
+       } else if (!strcmp (opt, "no-concurrent-sweep")) {
+               concurrent_sweep = FALSE;
+               return TRUE;
        }
 
        return FALSE;
@@ -2055,6 +2071,7 @@ major_print_gc_param_usage (void)
                        ""
                        "  evacuation-threshold=P (where P is a percentage, an integer in 0-100)\n"
                        "  (no-)lazy-sweep\n"
+                       "  (no-)concurrent-sweep\n"
                        );
 }
 
@@ -2067,8 +2084,7 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
        MSBlockInfo *block;
        gboolean has_references;
 
-       major_finish_sweeping ();
-
+       major_finish_sweep_checking ();
        FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                if (has_references)
                        callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
@@ -2192,7 +2208,7 @@ scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanObjectFun
                start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
                end = start + CARD_SIZE_IN_BYTES;
 
-               if (block->state != BLOCK_STATE_SWEPT && block->state != BLOCK_STATE_MARKING)
+               if (!block_is_swept_or_marking (block))
                        sweep_block (block);
 
                HEAVY_STAT (++marked_cards);
@@ -2259,13 +2275,12 @@ major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
        if (!concurrent_mark)
                g_assert (!mod_union);
 
-       major_finish_sweeping ();
-
+       major_finish_sweep_checking ();
        FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
 #ifdef PREFETCH_CARDS
                int prefetch_index = __index + 6;
                if (prefetch_index < allocated_blocks.next_slot) {
-                       MSBlockInfo *prefetch_block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [prefetch_index]);
+                       MSBlockInfo *prefetch_block = BLOCK_UNTAG (allocated_blocks.data [prefetch_index]);
                        guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
                        PREFETCH_READ (prefetch_block);
                        PREFETCH_WRITE (prefetch_cards);
@@ -2323,7 +2338,7 @@ update_cardtable_mod_union (void)
                block->cardtable_mod_union = sgen_card_table_update_mod_union (block->cardtable_mod_union,
                                MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards);
 
-               SGEN_ASSERT (0, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
+               SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
        } END_FOREACH_BLOCK_NO_LOCK;
 }
 
@@ -2341,6 +2356,7 @@ static void
 post_param_init (SgenMajorCollector *collector)
 {
        collector->sweeps_lazily = lazy_sweep;
+       collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
 }
 
 static void
@@ -2393,13 +2409,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->section_size = MAJOR_SECTION_SIZE;
 
        concurrent_mark = is_concurrent;
-       if (is_concurrent) {
-               collector->is_concurrent = TRUE;
+       collector->is_concurrent = is_concurrent;
+       collector->needs_thread_pool = is_concurrent || concurrent_sweep;
+       if (is_concurrent)
                collector->want_synchronous_collection = &want_evacuation;
-       } else {
-               collector->is_concurrent = FALSE;
+       else
                collector->want_synchronous_collection = NULL;
-       }
        collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
        collector->supports_cardtable = TRUE;
 
@@ -2423,7 +2438,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->init_to_space = major_init_to_space;
        collector->sweep = major_sweep;
        collector->have_swept = major_have_swept;
-       collector->finish_sweeping = major_finish_sweeping;
+       collector->finish_sweeping = major_finish_sweep_checking;
        collector->free_swept_blocks = major_free_swept_blocks;
        collector->check_scan_starts = major_check_scan_starts;
        collector->dump_heap = major_dump_heap;
@@ -2474,8 +2489,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
 #endif
 #endif
 
-       mono_mutex_init (&allocated_blocks_lock);
-
 #ifdef SGEN_HEAVY_BINARY_PROTOCOL
        mono_mutex_init (&scanned_objects_list_lock);
 #endif