Merge pull request #5428 from kumpera/wasm-support-p2

[mono.git] / mono / sgen / sgen-marksweep.c
diff --git a/mono/sgen/sgen-marksweep.c b/mono/sgen/sgen-marksweep.c

index 3b006682346fec47f4f4c7d0cd4cd7c2cf6e1aef..e06ac6d3f83f22177fa54f48f5dc96fa23870e98 100644 (file)
--- a/mono/sgen/sgen-marksweep.c
+++ b/mono/sgen/sgen-marksweep.c
@@ -1,5 +1,6 @@
-/*
- * sgen-marksweep.c: The Mark & Sweep major collector.
+/**
+ * \file
+ * The Mark & Sweep major collector.
   *
   * Author:
   *     Mark Probst <mark.probst@gmail.com>
@@ -31,16 +32,17 @@
  #include "mono/sgen/sgen-thread-pool.h"
  #include "mono/sgen/sgen-client.h"
  #include "mono/utils/mono-memory-model.h"
+#include "mono/utils/mono-proclib.h"
  
-#if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
-#define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
-#define MS_BLOCK_SIZE_SHIFT    ARCH_MIN_MS_BLOCK_SIZE_SHIFT
-#else
-#define MS_BLOCK_SIZE_SHIFT     14      /* INT FASTENABLE */
-#define MS_BLOCK_SIZE           (1 << MS_BLOCK_SIZE_SHIFT)
-#endif
-#define MAJOR_SECTION_SIZE     MS_BLOCK_SIZE
-#define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES)
+static int ms_block_size;
+
+/*
+ * Blocks must be at least this size, meaning that if we detect a
+ * page size lower than this, we'll use this instead.
+ */
+#define MS_BLOCK_SIZE_MIN (1024 * 16)
+
+#define CARDS_PER_BLOCK (ms_block_size / CARD_SIZE_IN_BYTES)
  
  /*
   * Don't allocate single blocks, but alloc a contingent of this many
@@ -48,16 +50,22 @@
   */
  #define MS_BLOCK_ALLOC_NUM     32
  
+#define MS_NUM_MARK_WORDS      ((ms_block_size / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8))
+
+/*
+ * Use this instead of sizeof (MSBlockInfo) since the mark_words
+ * array size depends on page size at runtime.
+ */
+#define SIZEOF_MS_BLOCK_INFO (sizeof (MSBlockInfo) + sizeof (guint32) * (MS_NUM_MARK_WORDS - MONO_ZERO_LEN_ARRAY))
+
  /*
   * Number of bytes before the first object in a block.  At the start
   * of a block is the MSBlockHeader, then opional padding, then come
- * the objects, so this must be >= sizeof (MSBlockHeader).
+ * the objects, so this must be >= SIZEOF_MS_BLOCK_INFO.
   */
-#define MS_BLOCK_SKIP  ((sizeof (MSBlockHeader) + 15) & ~15)
+#define MS_BLOCK_SKIP  ((SIZEOF_MS_BLOCK_INFO + 15) & ~15)
  
-#define MS_BLOCK_FREE  (MS_BLOCK_SIZE - MS_BLOCK_SKIP)
-
-#define MS_NUM_MARK_WORDS      (MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8)
+#define MS_BLOCK_FREE  (ms_block_size - MS_BLOCK_SKIP)
  
  /*
   * Blocks progress from one state to the next:
@@ -107,14 +115,14 @@ struct _MSBlockInfo {
         void ** volatile free_list;
         MSBlockInfo * volatile next_free;
         guint8 * volatile cardtable_mod_union;
-       guint32 mark_words [MS_NUM_MARK_WORDS];
+       guint32 mark_words [MONO_ZERO_LEN_ARRAY];
  };
  
  #define MS_BLOCK_FOR_BLOCK_INFO(b)     ((char*)(b))
  
  #define MS_BLOCK_OBJ(b,i)              ((GCObject *)(MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i)))
  #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size)            (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i))
-#define MS_BLOCK_DATA_FOR_OBJ(o)       ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1)))
+#define MS_BLOCK_DATA_FOR_OBJ(o)       ((char*)((mword)(o) & ~(mword)(ms_block_size - 1)))
  
  typedef struct {
         MSBlockInfo info;
@@ -149,7 +157,7 @@ typedef struct {
         } while (0)
  
  
-#define MS_OBJ_ALLOCED(o,b)    (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE))
+#define MS_OBJ_ALLOCED(o,b)    (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + ms_block_size))
  
  #define MS_BLOCK_OBJ_SIZE_FACTOR       (pow (2.0, 1.0 / 3))
  
@@ -181,10 +189,17 @@ enum {
         SWEEP_STATE_COMPACTING
  };
  
+typedef enum {
+       SGEN_SWEEP_SERIAL = FALSE,
+       SGEN_SWEEP_CONCURRENT = TRUE,
+} SgenSweepMode;
+
  static volatile int sweep_state = SWEEP_STATE_SWEPT;
  
  static gboolean concurrent_mark;
-static gboolean concurrent_sweep = TRUE;
+static gboolean concurrent_sweep = DEFAULT_SWEEP_MODE;
+
+int sweep_pool_context = -1;
  
  #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)     SGEN_POINTER_IS_TAGGED_1 ((bl))
  #define BLOCK_TAG_HAS_REFERENCES(bl)           SGEN_POINTER_TAG_1 ((bl))
@@ -202,6 +217,7 @@ static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_l
  /* non-allocated block free-list */
  static void *empty_blocks = NULL;
  static size_t num_empty_blocks = 0;
+static gboolean compact_blocks = FALSE;
  
  /*
   * We can iterate the block list also while sweep is in progress but we
@@ -224,6 +240,16 @@ static size_t num_empty_blocks = 0;
                 (bl) = BLOCK_UNTAG ((bl));
  #define END_FOREACH_BLOCK_NO_LOCK      } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
  
+#define FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) {    \
+       volatile gpointer *slot;                                        \
+       SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&allocated_blocks, begin, end, slot, index) { \
+               (bl) = (MSBlockInfo *) (*slot);                         \
+               if (!(bl))                                              \
+                       continue;                                       \
+               (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
+               (bl) = BLOCK_UNTAG ((bl));
+#define END_FOREACH_BLOCK_RANGE_NO_LOCK        } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
+
  static volatile size_t num_major_sections = 0;
  /*
   * One free block list for each block object size.  We add and remove blocks from these
@@ -294,6 +320,7 @@ ms_find_block_obj_size_index (size_t size)
  
  #define FREE_BLOCKS_FROM(lists,p,r)    (lists [((p) ? MS_BLOCK_FLAG_PINNED : 0) | ((r) ? MS_BLOCK_FLAG_REFS : 0)])
  #define FREE_BLOCKS(p,r)               (FREE_BLOCKS_FROM (free_block_lists, (p), (r)))
+#define FREE_BLOCKS_LOCAL(p,r)         (FREE_BLOCKS_FROM (((MSBlockInfo***)mono_native_tls_get_value (worker_block_free_list_key)), (p), (r)))
  
  #define MS_BLOCK_OBJ_SIZE_INDEX(s)                             \
         (((s)+7)>>3 < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES ?      \
@@ -301,7 +328,7 @@ ms_find_block_obj_size_index (size_t size)
          ms_find_block_obj_size_index ((s)))
  
  static void*
-major_alloc_heap (mword nursery_size, mword nursery_align, int the_nursery_bits)
+major_alloc_heap (mword nursery_size, mword nursery_align)
  {
         char *start;
         if (nursery_align)
@@ -315,7 +342,7 @@ major_alloc_heap (mword nursery_size, mword nursery_align, int the_nursery_bits)
  static void
  update_heap_boundaries_for_block (MSBlockInfo *block)
  {
-       sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE);
+       sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size);
  }
  
  /*
@@ -337,7 +364,7 @@ ms_get_empty_block (void)
                  */
                 int alloc_num = MS_BLOCK_ALLOC_NUM;
                 for (;;) {
-                       p = (char *)sgen_alloc_os_memory_aligned (MS_BLOCK_SIZE * alloc_num, MS_BLOCK_SIZE,
+                       p = (char *)sgen_alloc_os_memory_aligned (ms_block_size * alloc_num, ms_block_size,
                                 (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE),
                                 alloc_num == 1 ? "major heap section" : NULL, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
                         if (p)
@@ -356,7 +383,7 @@ ms_get_empty_block (void)
                                 empty = empty_blocks;
                                 *(void**)block = empty;
                         } while (SGEN_CAS_PTR ((gpointer*)&empty_blocks, block, empty) != empty);
-                       p += MS_BLOCK_SIZE;
+                       p += ms_block_size;
                 }
  
                 SGEN_ATOMIC_ADD_P (num_empty_blocks, alloc_num);
@@ -380,7 +407,7 @@ ms_get_empty_block (void)
  
         *(void**)block = NULL;
  
-       g_assert (!((mword)block & (MS_BLOCK_SIZE - 1)));
+       g_assert (!((mword)block & (ms_block_size - 1)));
  
         return block;
  }
@@ -395,10 +422,10 @@ ms_free_block (MSBlockInfo *info)
         void *empty;
         char *block = MS_BLOCK_FOR_BLOCK_INFO (info);
  
-       sgen_memgov_release_space (MS_BLOCK_SIZE, SPACE_MAJOR);
+       sgen_memgov_release_space (ms_block_size, SPACE_MAJOR);
         if (info->cardtable_mod_union)
-               sgen_card_table_free_mod_union (info->cardtable_mod_union, block, MS_BLOCK_SIZE);
-       memset (block, 0, MS_BLOCK_SIZE);
+               sgen_card_table_free_mod_union (info->cardtable_mod_union, block, ms_block_size);
+       memset (block, 0, ms_block_size);
  
         do {
                 empty = empty_blocks;
@@ -407,7 +434,7 @@ ms_free_block (MSBlockInfo *info)
  
         SGEN_ATOMIC_ADD_P (num_empty_blocks, 1);
  
-       binary_protocol_block_free (block, MS_BLOCK_SIZE);
+       binary_protocol_block_free (block, ms_block_size);
  }
  
  static gboolean
@@ -522,7 +549,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         char *obj_start;
         int i;
  
-       if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR))
+       if (!sgen_memgov_try_alloc_space (ms_block_size, SPACE_MAJOR))
                 return FALSE;
  
         info = (MSBlockInfo*)ms_get_empty_block ();
@@ -548,7 +575,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
  
         update_heap_boundaries_for_block (info);
  
-       binary_protocol_block_alloc (info, MS_BLOCK_SIZE);
+       binary_protocol_block_alloc (info, ms_block_size);
  
         /* build free list */
         obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP;
@@ -576,7 +603,7 @@ ptr_is_in_major_block (char *ptr, char **start, gboolean *pinned)
         MSBlockInfo *block;
  
         FOREACH_BLOCK_NO_LOCK (block) {
-               if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) {
+               if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) {
                         int count = MS_BLOCK_FREE / block->obj_size;
                         int i;
  
@@ -692,6 +719,55 @@ major_alloc_object (GCVTable vtable, size_t size, gboolean has_references)
         return alloc_obj (vtable, size, FALSE, has_references);
  }
  
+/*
+ * This can only be called by sgen workers. While this is called we assume
+ * that no other thread is accessing the block free lists. The world should
+ * be stopped and the gc thread should be waiting for workers to finish.
+ */
+static GCObject*
+major_alloc_object_par (GCVTable vtable, size_t size, gboolean has_references)
+{
+       int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
+       MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (FALSE, has_references);
+       MSBlockInfo **free_blocks_local = FREE_BLOCKS_LOCAL (FALSE, has_references);
+       void *obj;
+
+       if (free_blocks_local [size_index]) {
+get_slot:
+               obj = unlink_slot_from_free_list_uncontested (free_blocks_local, size_index);
+       } else {
+               MSBlockInfo *block;
+get_block:
+               block = free_blocks [size_index];
+               if (!block) {
+                       if (G_UNLIKELY (!ms_alloc_block (size_index, FALSE, has_references)))
+                               return NULL;
+                       goto get_block;
+               } else {
+                       MSBlockInfo *next_free = block->next_free;
+                       /*
+                        * Once a block is removed from the main list, it cannot return on the list until
+                        * all the workers are finished and sweep is starting. This means we don't need
+                        * to account for ABA problems.
+                        */
+                       if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free, block) != block)
+                               goto get_block;
+                       block->next_free = free_blocks_local [size_index];
+                       free_blocks_local [size_index] = block;
+
+                       goto get_slot;
+               }
+       }
+
+       /* FIXME: assumes object layout */
+       *(GCVTable*)obj = vtable;
+
+       /* FIXME is it worth CAS-ing here */
+       total_allocated_major += block_obj_sizes [size_index]; 
+
+       return (GCObject *)obj;
+}
+
  /*
   * We're not freeing the block if it's empty.  We leave that work for
   * the next major collection.
@@ -867,7 +943,7 @@ major_finish_sweep_checking (void)
   wait:
         job = sweep_job;
         if (job)
-               sgen_thread_pool_job_wait (job);
+               sgen_thread_pool_job_wait (sweep_pool_context, job);
         SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
  }
@@ -912,7 +988,7 @@ major_is_valid_object (char *object)
                 int idx;
                 char *obj;
  
-               if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= object))
+               if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= object))
                         continue;
  
                 idx = MS_BLOCK_OBJ_INDEX (object, block);
@@ -939,7 +1015,7 @@ major_describe_pointer (char *ptr)
                 int w, b;
                 gboolean marked;
  
-               if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= ptr))
+               if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= ptr))
                         continue;
  
                 SGEN_LOG (0, "major-ptr (block %p sz %d pin %d ref %d)\n",
@@ -1040,13 +1116,13 @@ get_cardtable_mod_union_for_block (MSBlockInfo *block, gboolean allocate)
                 return mod_union;
         else if (!allocate)
                 return NULL;
-       mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
+       mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
         other = (guint8 *)SGEN_CAS_PTR ((gpointer*)&block->cardtable_mod_union, mod_union, NULL);
         if (!other) {
                 SGEN_ASSERT (0, block->cardtable_mod_union == mod_union, "Why did CAS not replace?");
                 return mod_union;
         }
-       sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
+       sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
         return other;
  }
  
@@ -1094,7 +1170,7 @@ major_block_is_evacuating (MSBlockInfo *block)
                 if (!MS_MARK_BIT ((block), __word, __bit)) {            \
                         MS_SET_MARK_BIT ((block), __word, __bit);       \
                         if (sgen_gc_descr_has_references (desc))                        \
-                               GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
+                               GRAY_OBJECT_ENQUEUE_SERIAL ((queue), (obj), (desc)); \
                         binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
                         INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                 }                                                       \
@@ -1107,7 +1183,7 @@ major_block_is_evacuating (MSBlockInfo *block)
                 MS_SET_MARK_BIT_PAR ((block), __word, __bit, first);    \
                 if (first) {                                            \
                         if (sgen_gc_descr_has_references (desc))        \
-                               GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
+                               GRAY_OBJECT_ENQUEUE_PARALLEL ((queue), (obj), (desc)); \
                         binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
                         INC_NUM_MAJOR_OBJECTS_MARKED ();                \
                 }                                                       \
@@ -1128,6 +1204,7 @@ pin_major_object (GCObject *obj, SgenGrayQueue *queue)
         MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
  }
  
+#define COPY_OR_MARK_PARALLEL
  #include "sgen-major-copy-object.h"
  
  static long long
@@ -1148,7 +1225,7 @@ major_get_and_reset_num_major_objects_marked (void)
  #endif
  
  /* gcc 4.2.1 from xcode4 crashes on sgen_card_table_get_card_address () when this is enabled */
-#if defined(PLATFORM_MACOSX)
+#if defined(HOST_DARWIN)
  #if MONO_GNUC_VERSION <= 40300
  #undef PREFETCH_CARDS
  #endif
@@ -1178,6 +1255,12 @@ static guint64 stat_drain_loops;
  #define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_no_evacuation
  #include "sgen-marksweep-drain-gray-stack.h"
  
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_par_no_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_par_no_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_no_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
  #define COPY_OR_MARK_WITH_EVACUATION
  #define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_with_evacuation
  #define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_with_evacuation
@@ -1186,6 +1269,15 @@ static guint64 stat_drain_loops;
  #define SCAN_PTR_FIELD_FUNCTION_NAME   major_scan_ptr_field_with_evacuation
  #include "sgen-marksweep-drain-gray-stack.h"
  
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_WITH_EVACUATION
+#define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_par_with_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_par_with_evacuation
+#define SCAN_VTYPE_FUNCTION_NAME       major_scan_vtype_par_with_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_with_evacuation
+#define SCAN_PTR_FIELD_FUNCTION_NAME   major_scan_ptr_field_par_with_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
  #define COPY_OR_MARK_CONCURRENT
  #define COPY_OR_MARK_FUNCTION_NAME     major_copy_or_mark_object_concurrent_no_evacuation
  #define SCAN_OBJECT_FUNCTION_NAME      major_scan_object_concurrent_no_evacuation
@@ -1238,6 +1330,15 @@ drain_gray_stack (SgenGrayQueue *queue)
                 return drain_gray_stack_no_evacuation (queue);
  }
  
+static gboolean
+drain_gray_stack_par (SgenGrayQueue *queue)
+{
+       if (major_is_evacuating ())
+               return drain_gray_stack_par_with_evacuation (queue);
+       else
+               return drain_gray_stack_par_no_evacuation (queue);
+}
+
  static gboolean
  drain_gray_stack_concurrent (SgenGrayQueue *queue)
  {
@@ -1280,6 +1381,12 @@ major_copy_or_mark_object_concurrent_finish_canonical (GCObject **ptr, SgenGrayQ
         major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
  }
  
+static void
+major_copy_or_mark_object_concurrent_par_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
+{
+       major_copy_or_mark_object_par_with_evacuation (ptr, *ptr, queue);
+}
+
  static void
  mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue)
  {
@@ -1318,10 +1425,10 @@ static inline void
  sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
  {
         int obj_index;
+       void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, 0, obj_size);
  
-       for (obj_index = 0; obj_index < count; ++obj_index) {
+       for (obj_index = 0; obj_index < count; ++obj_index, obj = (void*)((mword)obj + obj_size)) {
                 int word, bit;
-               void *obj = MS_BLOCK_OBJ_FOR_SIZE (block, obj_index, obj_size);
  
                 MS_CALC_MARK_BIT (word, bit, obj);
                 if (MS_MARK_BIT (block, word, bit)) {
@@ -1350,7 +1457,7 @@ try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state
         gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
         gboolean success = old_state == expected_state;
         if (success)
-               binary_protocol_block_set_state (block, MS_BLOCK_SIZE, old_state, new_state);
+               binary_protocol_block_set_state (block, ms_block_size, old_state, new_state);
         return success;
  }
  
@@ -1359,7 +1466,7 @@ set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
  {
         SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
         block->state = new_state;
-       binary_protocol_block_set_state (block, MS_BLOCK_SIZE, expected_state, new_state);
+       binary_protocol_block_set_state (block, ms_block_size, expected_state, new_state);
  }
  
  /*
@@ -1473,6 +1580,25 @@ sgen_worker_clear_free_block_lists (WorkerData *worker)
         }
  }
  
+static void
+sgen_worker_clear_free_block_lists_evac (WorkerData *worker)
+{
+       int i, j;
+
+       if (!worker->free_block_lists)
+               return;
+
+       for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) {
+               for (j = 0; j < num_block_obj_sizes; j++) {
+                       if (((MSBlockInfo***) worker->free_block_lists) [i][j])
+                               SGEN_ASSERT (0, !((MSBlockInfo***) worker->free_block_lists) [i][j]->next_free, "Why do we have linked free blocks on the workers");
+
+                       if (evacuate_block_obj_sizes [j])
+                               ((MSBlockInfo***) worker->free_block_lists) [i][j] = NULL;
+               }
+       }
+}
+
  static void
  sweep_start (void)
  {
@@ -1489,7 +1615,10 @@ sweep_start (void)
                         free_blocks [j] = NULL;
         }
  
-       sgen_workers_foreach (sgen_worker_clear_free_block_lists);
+       sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists);
+       sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists);
+
+       compact_blocks = TRUE;
  }
  
  static void sweep_finish (void);
@@ -1705,7 +1834,7 @@ sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
          */
         if (concurrent_sweep && lazy_sweep) {
                 sweep_blocks_job = sgen_thread_pool_job_alloc ("sweep_blocks", sweep_blocks_job_func, sizeof (SgenThreadPoolJob));
-               sgen_thread_pool_job_enqueue (sweep_blocks_job);
+               sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_blocks_job);
         }
  
         sweep_finish ();
@@ -1754,7 +1883,7 @@ major_sweep (void)
         SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
         if (concurrent_sweep) {
                 sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
-               sgen_thread_pool_job_enqueue (sweep_job);
+               sgen_thread_pool_job_enqueue (sweep_pool_context, sweep_job);
         } else {
                 sweep_job_func (NULL, NULL);
         }
@@ -1863,6 +1992,18 @@ major_start_nursery_collection (void)
  #endif
  
         old_num_major_sections = num_major_sections;
+
+       /* Compact the block list if it hasn't been compacted in a while and nobody is using it */
+       if (compact_blocks && !sweep_in_progress () && !sweep_blocks_job && !sgen_concurrent_collection_in_progress ()) {
+               /*
+                * We support null elements in the array but do regular compaction to avoid
+                * excessive traversal of the array and to facilitate splitting into well
+                * balanced sections for parallel modes. We compact as soon as possible after
+                * sweep.
+                */
+               sgen_array_list_remove_nulls (&allocated_blocks);
+               compact_blocks = FALSE;
+       }
  }
  
  static void
@@ -1956,6 +2097,10 @@ major_start_major_collection (void)
                 sgen_evacuation_freelist_blocks (&free_block_lists [MS_BLOCK_FLAG_REFS][i], i);
         }
  
+       /* We expect workers to have very few blocks on the freelist, just evacuate them */
+       sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists_evac);
+       sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists_evac);
+
         if (lazy_sweep && concurrent_sweep) {
                 /*
                  * sweep_blocks_job is created before sweep_finish, which we wait for above
@@ -1964,7 +2109,7 @@ major_start_major_collection (void)
                  */
                 SgenThreadPoolJob *job = sweep_blocks_job;
                 if (job)
-                       sgen_thread_pool_job_wait (job);
+                       sgen_thread_pool_job_wait (sweep_pool_context, job);
         }
  
         if (lazy_sweep && !concurrent_sweep)
@@ -2022,7 +2167,7 @@ major_free_swept_blocks (size_t section_reserve)
  {
         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
  
-#ifdef TARGET_WIN32
+#if defined(HOST_WIN32) || defined(HOST_ORBIS)
                 /*
                  * sgen_free_os_memory () asserts in mono_vfree () because windows doesn't like freeing the middle of
                  * a VirtualAlloc ()-ed block.
@@ -2088,7 +2233,7 @@ major_free_swept_blocks (size_t section_reserve)
  
                                 SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
  
-                               if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) {
+                               if ((char*)block != ((char*)empty_block_arr [d-1]) + ms_block_size) {
                                         first = d;
                                         continue;
                                 }
@@ -2102,7 +2247,7 @@ major_free_swept_blocks (size_t section_reserve)
                                          * we're iterating.
                                          */
                                         int j;
-                                       sgen_free_os_memory (empty_block_arr [first], MS_BLOCK_SIZE * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
+                                       sgen_free_os_memory (empty_block_arr [first], ms_block_size * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
                                         for (j = first; j <= d; ++j)
                                                 empty_block_arr [j] = NULL;
                                         dest = first;
@@ -2152,7 +2297,7 @@ major_free_swept_blocks (size_t section_reserve)
  
         while (num_empty_blocks > section_reserve) {
                 void *next = *(void**)empty_blocks;
-               sgen_free_os_memory (empty_blocks, MS_BLOCK_SIZE, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
+               sgen_free_os_memory (empty_blocks, ms_block_size, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP);
                 empty_blocks = next;
                 /*
                  * Needs not be atomic because this is running
@@ -2173,7 +2318,7 @@ major_pin_objects (SgenGrayQueue *queue)
         FOREACH_BLOCK_NO_LOCK (block) {
                 size_t first_entry, last_entry;
                 SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
-               sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
+               sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size,
                                 &first_entry, &last_entry);
                 mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
         } END_FOREACH_BLOCK_NO_LOCK;
@@ -2229,7 +2374,7 @@ static size_t
  get_bytes_survived_last_sweep (void)
  {
         SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep");
-       return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE;
+       return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * ms_block_size;
  }
  
  static gboolean
@@ -2283,7 +2428,7 @@ major_iterate_block_ranges (sgen_cardtable_block_callback callback)
  
         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                 if (has_references)
-                       callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
+                       callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
         } END_FOREACH_BLOCK_NO_LOCK;
  }
  
@@ -2296,7 +2441,7 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
         major_finish_sweep_checking ();
         FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                 if (has_references)
-                       callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
+                       callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size);
         } END_FOREACH_BLOCK_NO_LOCK;
  }
  
@@ -2317,7 +2462,7 @@ static guint8*
  initial_skip_card (guint8 *card_data)
  {
         mword *cards = (mword*)card_data;
-       mword card;
+       mword card = 0;
         int i;
         for (i = 0; i < CARD_WORDS_PER_BLOCK; ++i) {
                 card = cards [i];
@@ -2345,17 +2490,22 @@ initial_skip_card (guint8 *card_data)
  
  #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os)        (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os))
  #define MS_BLOCK_OBJ_FAST(b,os,i)                      ((b) + MS_BLOCK_SKIP + (os) * (i))
-#define MS_OBJ_ALLOCED_FAST(o,b)               (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE))
+#define MS_OBJ_ALLOCED_FAST(o,b)               (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + ms_block_size))
  
  static void
  scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx)
  {
         SgenGrayQueue *queue = ctx.queue;
         ScanObjectFunc scan_func = ctx.ops->scan_object;
+       /*
+        * FIXME: On systems with very large pages, we allocate fairly large
+        * arrays on the stack here. This shouldn't be a problem once block
+        * size is no longer required to be a multiple of the system page size.
+        */
  #ifndef SGEN_HAVE_OVERLAPPING_CARDS
-       guint8 cards_copy [CARDS_PER_BLOCK];
+       guint8 *cards_copy = alloca (sizeof (guint8) * CARDS_PER_BLOCK);
  #endif
-       guint8 cards_preclean [CARDS_PER_BLOCK];
+       guint8 *cards_preclean = alloca (sizeof (guint8) * CARDS_PER_BLOCK);
         gboolean small_objects;
         int block_obj_size;
         char *block_start;
@@ -2483,10 +2633,24 @@ scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, Scan
  }
  
  static void
-major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
+major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count)
  {
         MSBlockInfo *block;
         gboolean has_references, was_sweeping, skip_scan;
+       int first_block, last_block, index;
+
+       /*
+        * The last_block's index is at least (num_major_sections - 1) since we
+        * can have nulls in the allocated_blocks list. The last worker will
+        * scan the left-overs of the list. We expect few null entries in the
+        * allocated_blocks list, therefore using num_major_sections for computing
+        * block_count shouldn't affect work distribution.
+        */
+       first_block = block_count * job_index;
+       if (job_index == job_split_count - 1)
+               last_block = allocated_blocks.next_slot;
+       else
+               last_block = block_count * (job_index + 1);
  
         if (!concurrent_mark)
                 g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
@@ -2496,11 +2660,9 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job
         was_sweeping = sweep_in_progress ();
  
         binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
-       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
-               if (__index % job_split_count != job_index)
-                       continue;
+       FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK (block, first_block, last_block, index, has_references) {
  #ifdef PREFETCH_CARDS
-               int prefetch_index = __index + 6 * job_split_count;
+               int prefetch_index = index + 6;
                 if (prefetch_index < allocated_blocks.next_slot) {
                         MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
                         PREFETCH_READ (prefetch_block);
@@ -2511,7 +2673,6 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job
                         }
                  }
  #endif
-
                 if (!has_references)
                         continue;
                 skip_scan = FALSE;
@@ -2535,16 +2696,16 @@ major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job
                                  * sweep start since we are in a nursery collection. Also avoid CAS-ing
                                  */
                                 if (sweep_in_progress ()) {
-                                       skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL);
+                                       skip_scan = !ensure_block_is_checked_for_sweeping (index, TRUE, NULL);
                                 } else if (was_sweeping) {
                                         /* Recheck in case sweep finished after dereferencing the slot */
-                                       skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0;
+                                       skip_scan = *sgen_array_list_get_slot (&allocated_blocks, index) == 0;
                                 }
                         }
                 }
                 if (!skip_scan)
                         scan_card_table_for_block (block, scan_type, ctx);
-       } END_FOREACH_BLOCK_NO_LOCK;
+       } END_FOREACH_BLOCK_RANGE_NO_LOCK;
         binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
  }
  
@@ -2598,7 +2759,7 @@ update_cardtable_mod_union (void)
                 if (has_dirty_cards) {
                         size_t num_cards;
                         guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE);
-                       sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards);
+                       sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size, &num_cards);
                         SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
                 }
         } END_FOREACH_BLOCK_NO_LOCK;
@@ -2610,21 +2771,36 @@ static void
  post_param_init (SgenMajorCollector *collector)
  {
         collector->sweeps_lazily = lazy_sweep;
-       collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
  }
  
-/* We are guaranteed to be called by the worker in question */
+/*
+ * We are guaranteed to be called by the worker in question.
+ * This provides initialization for threads that plan to do
+ * parallel object allocation. We need to store these lists
+ * in additional data structures so we can traverse them
+ * at major/sweep start.
+ */
  static void
-sgen_worker_init_callback (gpointer worker_untyped)
+sgen_init_block_free_lists (gpointer *list_p)
  {
         int i;
-       WorkerData *worker = (WorkerData*) worker_untyped;
-       MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE);
+       MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) mono_native_tls_get_value (worker_block_free_list_key);
+
+       /*
+        * For simplification, a worker thread uses the same free block lists,
+        * regardless of the context it is part of (major/minor).
+        */
+       if (worker_free_blocks) {
+               *list_p = (gpointer)worker_free_blocks;
+               return;
+       }
+
+       worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE);
  
         for (i = 0; i < MS_BLOCK_TYPE_MAX; i++)
                 worker_free_blocks [i] = (MSBlockInfo **) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
  
-       worker->free_block_lists = worker_free_blocks;
+       *list_p = (gpointer)worker_free_blocks;
  
         mono_native_tls_set_value (worker_block_free_list_key, worker_free_blocks);
  }
@@ -2634,7 +2810,15 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
  {
         int i;
  
-       sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, sizeof (MSBlockInfo));
+       ms_block_size = mono_pagesize ();
+
+       if (ms_block_size < MS_BLOCK_SIZE_MIN)
+               ms_block_size = MS_BLOCK_SIZE_MIN;
+
+       sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, SIZEOF_MS_BLOCK_INFO);
+
+       if (mono_cpu_count () <= 1)
+               is_parallel = FALSE;
  
         num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL);
         block_obj_sizes = (int *)sgen_alloc_internal_dynamic (sizeof (int) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
@@ -2665,6 +2849,10 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
         for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES * 8; ++i)
                 g_assert (MS_BLOCK_OBJ_SIZE_INDEX (i) == ms_find_block_obj_size_index (i));
  
+       /* We can do this because we always init the minor before the major */
+       if (is_parallel || sgen_get_minor_collector ()->is_parallel)
+               mono_native_tls_alloc (&worker_block_free_list_key, NULL);
+
         mono_counters_register ("# major blocks allocated", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced);
         mono_counters_register ("# major blocks freed", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed);
         mono_counters_register ("# major blocks lazy swept", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_lazy_swept);
@@ -2673,12 +2861,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
         mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual);
         mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal);
  
-       collector->section_size = MAJOR_SECTION_SIZE;
+       collector->section_size = ms_block_size;
  
         concurrent_mark = is_concurrent;
         collector->is_concurrent = is_concurrent;
         collector->is_parallel = is_parallel;
-       collector->needs_thread_pool = is_concurrent || concurrent_sweep;
         collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
         collector->supports_cardtable = TRUE;
  
@@ -2688,6 +2875,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
         collector->alloc_degraded = major_alloc_degraded;
  
         collector->alloc_object = major_alloc_object;
+       collector->alloc_object_par = major_alloc_object_par;
         collector->free_pinned_object = free_pinned_object;
         collector->iterate_objects = major_iterate_objects;
         collector->free_non_pinned_object = major_free_non_pinned_object;
@@ -2723,9 +2911,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
         collector->is_valid_object = major_is_valid_object;
         collector->describe_pointer = major_describe_pointer;
         collector->count_cards = major_count_cards;
+       collector->init_block_free_lists = sgen_init_block_free_lists;
  
         collector->major_ops_serial.copy_or_mark_object = major_copy_or_mark_object_canonical;
         collector->major_ops_serial.scan_object = major_scan_object_with_evacuation;
+       collector->major_ops_serial.scan_ptr_field = major_scan_ptr_field_with_evacuation;
         collector->major_ops_serial.drain_gray_stack = drain_gray_stack;
         if (is_concurrent) {
                 collector->major_ops_concurrent_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_canonical;
@@ -2747,12 +2937,11 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
                         collector->major_ops_conc_par_start.scan_ptr_field = major_scan_ptr_field_concurrent_par_with_evacuation;
                         collector->major_ops_conc_par_start.drain_gray_stack = drain_gray_stack_concurrent_par;
  
-                       /* FIXME use parallel obj ops */
-                       collector->major_ops_conc_par_finish = collector->major_ops_concurrent_finish;
-
-                       collector->worker_init_cb = sgen_worker_init_callback;
-
-                       mono_native_tls_alloc (&worker_block_free_list_key, NULL);
+                       collector->major_ops_conc_par_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_finish_canonical;
+                       collector->major_ops_conc_par_finish.scan_object = major_scan_object_par_with_evacuation;
+                       collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation;
+                       collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation;
+                       collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par;
                 }
         }
  
@@ -2781,7 +2970,15 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
         SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2");
  
         /*cardtable requires major pages to be 8 cards aligned*/
-       g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0);
+       g_assert ((ms_block_size % (8 * CARD_SIZE_IN_BYTES)) == 0);
+
+       if (is_concurrent && is_parallel)
+               sgen_workers_create_context (GENERATION_OLD, mono_cpu_count ());
+       else if (is_concurrent)
+               sgen_workers_create_context (GENERATION_OLD, 1);
+
+       if (concurrent_sweep)
+               sweep_pool_context = sgen_thread_pool_create_context (1, NULL, NULL, NULL, NULL, NULL);
  }
  
  void