Merge branch 'feature-concurrent-sweep'
authorMark Probst <mark.probst@gmail.com>
Thu, 9 Apr 2015 23:13:32 +0000 (16:13 -0700)
committerMark Probst <mark.probst@gmail.com>
Thu, 9 Apr 2015 23:13:32 +0000 (16:13 -0700)
19 files changed:
man/mono.1
mono/metadata/Makefile.am
mono/metadata/sgen-gc.c
mono/metadata/sgen-gc.h
mono/metadata/sgen-internal.c
mono/metadata/sgen-marksweep.c
mono/metadata/sgen-memory-governor.c
mono/metadata/sgen-memory-governor.h
mono/metadata/sgen-os-mach.c
mono/metadata/sgen-pointer-queue.c
mono/metadata/sgen-pointer-queue.h
mono/metadata/sgen-protocol-def.h
mono/metadata/sgen-protocol.c
mono/metadata/sgen-stw.c
mono/metadata/sgen-tagged-pointer.h
mono/metadata/sgen-thread-pool.c [new file with mode: 0644]
mono/metadata/sgen-thread-pool.h [new file with mode: 0644]
mono/metadata/sgen-workers.c
mono/metadata/sgen-workers.h

index a3ab46153f59aa7bf5c3687bfb1a8019ec63f9c0..fc49f3b90653115d573561dd3f39bf3fe6b2fd96 100644 (file)
@@ -1129,9 +1129,16 @@ to 100 percent.  A value of 0 turns evacuation off.
 .TP
 \fB(no-)lazy-sweep\fR
 Enables or disables lazy sweep for the Mark&Sweep collector.  If
-enabled, the sweep phase of the garbage collection is done piecemeal
-whenever the need arises, typically during nursery collections.  Lazy
-sweeping is enabled by default.
+enabled, the sweeping of individual major heap blocks is done
+piecemeal whenever the need arises, typically during nursery
+collections.  Lazy sweeping is enabled by default.
+.TP
+\fB(no-)concurrent-sweep\fR
+Enables or disables concurrent sweep for the Mark&Sweep collector.  If
+enabled, the iteration of all major blocks to determine which ones can
+be freed and which ones have to be kept and swept, is done
+concurrently with the running program.  Concurrent sweeping is enabled
+by default.
 .TP
 \fBstack-mark=\fImark-mode\fR
 Specifies how application threads should be scanned. Options are
index 8b22bbb646166414191708275b70ea0173f866b6..1241a7a079e60d46d531e556a58f8ddd0bde10d0 100644 (file)
@@ -291,6 +291,8 @@ sgen_sources = \
        sgen-layout-stats.h     \
        sgen-qsort.c    \
        sgen-qsort.h    \
+       sgen-thread-pool.c      \
+       sgen-thread-pool.h      \
        sgen-tagged-pointer.h
 
 libmonoruntime_la_SOURCES = $(common_sources) $(gc_dependent_sources) $(null_gc_sources) $(boehm_sources)
index 532e1f74aa5287eba0508c99748a59418f28face..0c841e28c040aff3738129908e147cc02e101f8a 100644 (file)
@@ -328,19 +328,15 @@ static guint64 time_minor_pre_collection_fragment_clear = 0;
 static guint64 time_minor_pinning = 0;
 static guint64 time_minor_scan_remsets = 0;
 static guint64 time_minor_scan_pinned = 0;
-static guint64 time_minor_scan_registered_roots = 0;
-static guint64 time_minor_scan_thread_data = 0;
+static guint64 time_minor_scan_roots = 0;
 static guint64 time_minor_finish_gray_stack = 0;
 static guint64 time_minor_fragment_creation = 0;
 
 static guint64 time_major_pre_collection_fragment_clear = 0;
 static guint64 time_major_pinning = 0;
 static guint64 time_major_scan_pinned = 0;
-static guint64 time_major_scan_registered_roots = 0;
-static guint64 time_major_scan_thread_data = 0;
-static guint64 time_major_scan_alloc_pinned = 0;
-static guint64 time_major_scan_finalized = 0;
-static guint64 time_major_scan_big_objects = 0;
+static guint64 time_major_scan_roots = 0;
+static guint64 time_major_scan_mod_union = 0;
 static guint64 time_major_finish_gray_stack = 0;
 static guint64 time_major_free_bigobjs = 0;
 static guint64 time_major_los_sweep = 0;
@@ -593,12 +589,7 @@ gray_queue_redirect (SgenGrayQueue *queue)
 
        if (wake) {
                g_assert (concurrent_collection_in_progress);
-               if (sgen_workers_have_started ()) {
-                       sgen_workers_ensure_awake ();
-               } else {
-                       if (concurrent_collection_in_progress)
-                               g_assert (current_collection_generation == -1);
-               }
+               sgen_workers_ensure_awake ();
        }
 }
 
@@ -748,6 +739,8 @@ mono_gc_clear_domain (MonoDomain * domain)
                sgen_perform_collection (0, GENERATION_OLD, "clear domain", TRUE);
        g_assert (!concurrent_collection_in_progress);
 
+       major_collector.finish_sweeping ();
+
        sgen_process_fin_stage_entries ();
        sgen_process_dislink_stage_entries ();
 
@@ -1902,19 +1895,14 @@ init_stats (void)
        mono_counters_register ("Minor pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_pinning);
        mono_counters_register ("Minor scan remembered set", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_remsets);
        mono_counters_register ("Minor scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_pinned);
-       mono_counters_register ("Minor scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_registered_roots);
-       mono_counters_register ("Minor scan thread data", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_thread_data);
-       mono_counters_register ("Minor finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_finish_gray_stack);
+       mono_counters_register ("Minor scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_scan_roots);
        mono_counters_register ("Minor fragment creation", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_minor_fragment_creation);
 
        mono_counters_register ("Major fragment clear", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pre_collection_fragment_clear);
        mono_counters_register ("Major pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pinning);
        mono_counters_register ("Major scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_pinned);
-       mono_counters_register ("Major scan registered roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_registered_roots);
-       mono_counters_register ("Major scan thread data", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_thread_data);
-       mono_counters_register ("Major scan alloc_pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_alloc_pinned);
-       mono_counters_register ("Major scan finalized", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_finalized);
-       mono_counters_register ("Major scan big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_big_objects);
+       mono_counters_register ("Major scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_roots);
+       mono_counters_register ("Major scan mod union", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union);
        mono_counters_register ("Major finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_finish_gray_stack);
        mono_counters_register ("Major free big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_free_bigobjs);
        mono_counters_register ("Major LOS sweep", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_los_sweep);
@@ -1992,66 +1980,75 @@ sgen_concurrent_collection_in_progress (void)
 }
 
 static void
-job_remembered_set_scan (WorkerData *worker_data, void *dummy)
+job_remembered_set_scan (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
+       WorkerData *worker_data = worker_data_untyped;
        remset.scan_remsets (sgen_workers_get_job_gray_queue (worker_data));
 }
 
-typedef struct
-{
+typedef struct {
+       SgenThreadPoolJob job;
        CopyOrMarkObjectFunc copy_or_mark_func;
        ScanObjectFunc scan_func;
        char *heap_start;
        char *heap_end;
        int root_type;
-} ScanFromRegisteredRootsJobData;
+} ScanFromRegisteredRootsJob;
 
 static void
-job_scan_from_registered_roots (WorkerData *worker_data, void *job_data_untyped)
+job_scan_from_registered_roots (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       ScanFromRegisteredRootsJobData *job_data = job_data_untyped;
+       WorkerData *worker_data = worker_data_untyped;
+       ScanFromRegisteredRootsJob *job_data = (ScanFromRegisteredRootsJob*)job;
        ScanCopyContext ctx = { job_data->scan_func, job_data->copy_or_mark_func,
                sgen_workers_get_job_gray_queue (worker_data) };
 
        scan_from_registered_roots (job_data->heap_start, job_data->heap_end, job_data->root_type, ctx);
-       sgen_free_internal_dynamic (job_data, sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA);
 }
 
-typedef struct
-{
+typedef struct {
+       SgenThreadPoolJob job;
        char *heap_start;
        char *heap_end;
-} ScanThreadDataJobData;
+} ScanThreadDataJob;
 
 static void
-job_scan_thread_data (WorkerData *worker_data, void *job_data_untyped)
+job_scan_thread_data (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       ScanThreadDataJobData *job_data = job_data_untyped;
+       WorkerData *worker_data = worker_data_untyped;
+       ScanThreadDataJob *job_data = (ScanThreadDataJob*)job;
 
        scan_thread_data (job_data->heap_start, job_data->heap_end, TRUE,
                        sgen_workers_get_job_gray_queue (worker_data));
-       sgen_free_internal_dynamic (job_data, sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA);
 }
 
+typedef struct {
+       SgenThreadPoolJob job;
+       FinalizeReadyEntry *list;
+} ScanFinalizerEntriesJob;
+
 static void
-job_scan_finalizer_entries (WorkerData *worker_data, void *job_data_untyped)
+job_scan_finalizer_entries (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
-       FinalizeReadyEntry *list = job_data_untyped;
+       WorkerData *worker_data = worker_data_untyped;
+       ScanFinalizerEntriesJob *job_data = (ScanFinalizerEntriesJob*)job;
        ScanCopyContext ctx = { NULL, current_object_ops.copy_or_mark_object, sgen_workers_get_job_gray_queue (worker_data) };
 
-       scan_finalizer_entries (list, ctx);
+       scan_finalizer_entries (job_data->list, ctx);
 }
 
 static void
-job_scan_major_mod_union_cardtable (WorkerData *worker_data, void *job_data_untyped)
+job_scan_major_mod_union_cardtable (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
+       WorkerData *worker_data = worker_data_untyped;
        g_assert (concurrent_collection_in_progress);
        major_collector.scan_card_table (TRUE, sgen_workers_get_job_gray_queue (worker_data));
 }
 
 static void
-job_scan_los_mod_union_cardtable (WorkerData *worker_data, void *job_data_untyped)
+job_scan_los_mod_union_cardtable (void *worker_data_untyped, SgenThreadPoolJob *job)
 {
+       WorkerData *worker_data = worker_data_untyped;
        g_assert (concurrent_collection_in_progress);
        sgen_los_scan_card_table (TRUE, sgen_workers_get_job_gray_queue (worker_data));
 }
@@ -2155,6 +2152,49 @@ init_gray_queue (void)
        sgen_gray_object_queue_init (&gray_queue, NULL);
 }
 
+static void
+enqueue_scan_from_roots_jobs (char *heap_start, char *heap_end)
+{
+       ScanFromRegisteredRootsJob *scrrj;
+       ScanThreadDataJob *stdj;
+       ScanFinalizerEntriesJob *sfej;
+
+       /* registered roots, this includes static fields */
+
+       scrrj = (ScanFromRegisteredRootsJob*)sgen_thread_pool_job_alloc ("scan from registered roots normal", job_scan_from_registered_roots, sizeof (ScanFromRegisteredRootsJob));
+       scrrj->copy_or_mark_func = current_object_ops.copy_or_mark_object;
+       scrrj->scan_func = current_object_ops.scan_object;
+       scrrj->heap_start = heap_start;
+       scrrj->heap_end = heap_end;
+       scrrj->root_type = ROOT_TYPE_NORMAL;
+       sgen_workers_enqueue_job (&scrrj->job);
+
+       scrrj = (ScanFromRegisteredRootsJob*)sgen_thread_pool_job_alloc ("scan from registered roots wbarrier", job_scan_from_registered_roots, sizeof (ScanFromRegisteredRootsJob));
+       scrrj->copy_or_mark_func = current_object_ops.copy_or_mark_object;
+       scrrj->scan_func = current_object_ops.scan_object;
+       scrrj->heap_start = heap_start;
+       scrrj->heap_end = heap_end;
+       scrrj->root_type = ROOT_TYPE_WBARRIER;
+       sgen_workers_enqueue_job (&scrrj->job);
+
+       /* Threads */
+
+       stdj = (ScanThreadDataJob*)sgen_thread_pool_job_alloc ("scan thread data", job_scan_thread_data, sizeof (ScanThreadDataJob));
+       stdj->heap_start = heap_start;
+       stdj->heap_end = heap_end;
+       sgen_workers_enqueue_job (&stdj->job);
+
+       /* Scan the list of objects ready for finalization. */
+
+       sfej = (ScanFinalizerEntriesJob*)sgen_thread_pool_job_alloc ("scan finalizer entries", job_scan_finalizer_entries, sizeof (ScanFinalizerEntriesJob));
+       sfej->list = fin_ready_list;
+       sgen_workers_enqueue_job (&sfej->job);
+
+       sfej = (ScanFinalizerEntriesJob*)sgen_thread_pool_job_alloc ("scan critical finalizer entries", job_scan_finalizer_entries, sizeof (ScanFinalizerEntriesJob));
+       sfej->list = critical_fin_list;
+       sgen_workers_enqueue_job (&sfej->job);
+}
+
 /*
  * Perform a nursery collection.
  *
@@ -2166,8 +2206,6 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        gboolean needs_major;
        size_t max_garbage_amount;
        char *nursery_next;
-       ScanFromRegisteredRootsJobData *scrrjd_normal, *scrrjd_wbarrier;
-       ScanThreadDataJobData *stdjd;
        mword fragment_total;
        ScanCopyContext ctx;
        TV_DECLARE (atv);
@@ -2191,6 +2229,8 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
        current_collection_generation = GENERATION_NURSERY;
        current_object_ops = sgen_minor_collector.serial_ops;
 
+       SGEN_ASSERT (0, !sgen_collection_is_concurrent (), "Why is the nursery collection concurrent?");
+
        reset_pinned_from_failed_allocation ();
 
        check_scan_starts ();
@@ -2267,7 +2307,7 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
         * as part of which we scan the card table.  Then, later, we scan the mod union
         * cardtable.  We should only have to do one.
         */
-       sgen_workers_enqueue_job ("scan remset", job_remembered_set_scan, NULL);
+       sgen_workers_enqueue_job (sgen_thread_pool_job_alloc ("scan remset", job_remembered_set_scan, sizeof (SgenThreadPoolJob)));
 
        /* we don't have complete write barrier yet, so we scan all the old generation sections */
        TV_GETTIME (btv);
@@ -2291,46 +2331,13 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
 
        MONO_GC_CHECKPOINT_5 (GENERATION_NURSERY);
 
-       /* registered roots, this includes static fields */
-       scrrjd_normal = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       scrrjd_normal->copy_or_mark_func = current_object_ops.copy_or_mark_object;
-       scrrjd_normal->scan_func = current_object_ops.scan_object;
-       scrrjd_normal->heap_start = sgen_get_nursery_start ();
-       scrrjd_normal->heap_end = nursery_next;
-       scrrjd_normal->root_type = ROOT_TYPE_NORMAL;
-       sgen_workers_enqueue_job ("scan from registered roots normal", job_scan_from_registered_roots, scrrjd_normal);
-
-       scrrjd_wbarrier = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       scrrjd_wbarrier->copy_or_mark_func = current_object_ops.copy_or_mark_object;
-       scrrjd_wbarrier->scan_func = current_object_ops.scan_object;
-       scrrjd_wbarrier->heap_start = sgen_get_nursery_start ();
-       scrrjd_wbarrier->heap_end = nursery_next;
-       scrrjd_wbarrier->root_type = ROOT_TYPE_WBARRIER;
-       sgen_workers_enqueue_job ("scan from registered roots wbarrier", job_scan_from_registered_roots, scrrjd_wbarrier);
+       enqueue_scan_from_roots_jobs (sgen_get_nursery_start (), nursery_next);
 
        TV_GETTIME (btv);
-       time_minor_scan_registered_roots += TV_ELAPSED (atv, btv);
+       time_minor_scan_roots += TV_ELAPSED (atv, btv);
 
        MONO_GC_CHECKPOINT_6 (GENERATION_NURSERY);
-
-       /* thread data */
-       stdjd = sgen_alloc_internal_dynamic (sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       stdjd->heap_start = sgen_get_nursery_start ();
-       stdjd->heap_end = nursery_next;
-       sgen_workers_enqueue_job ("scan thread data", job_scan_thread_data, stdjd);
-
-       TV_GETTIME (atv);
-       time_minor_scan_thread_data += TV_ELAPSED (btv, atv);
-       btv = atv;
-
        MONO_GC_CHECKPOINT_7 (GENERATION_NURSERY);
-
-       g_assert (!sgen_collection_is_concurrent ());
-
-       /* Scan the list of objects ready for finalization. If */
-       sgen_workers_enqueue_job ("scan finalizer entries", job_scan_finalizer_entries, fin_ready_list);
-       sgen_workers_enqueue_job ("scan criticial finalizer entries", job_scan_finalizer_entries, critical_fin_list);
-
        MONO_GC_CHECKPOINT_8 (GENERATION_NURSERY);
 
        finish_gray_stack (GENERATION_NURSERY, &gray_queue);
@@ -2340,14 +2347,6 @@ collect_nursery (SgenGrayQueue *unpin_queue, gboolean finish_up_concurrent_mark)
 
        MONO_GC_CHECKPOINT_9 (GENERATION_NURSERY);
 
-       /*
-        * The (single-threaded) finalization code might have done
-        * some copying/marking so we can only reset the GC thread's
-        * worker data here instead of earlier when we joined the
-        * workers.
-        */
-       sgen_workers_reset_data ();
-
        if (objects_pinned) {
                sgen_optimize_pin_queue ();
                sgen_pinning_setup_section (nursery_section);
@@ -2445,8 +2444,6 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        char *heap_end = (char*)-1;
        gboolean profile_roots = mono_profiler_get_events () & MONO_PROFILE_GC_ROOTS;
        GCRootReport root_report = { 0 };
-       ScanFromRegisteredRootsJobData *scrrjd_normal, *scrrjd_wbarrier;
-       ScanThreadDataJobData *stdjd;
        ScanCopyContext ctx;
 
        if (concurrent_collection_in_progress) {
@@ -2630,59 +2627,24 @@ major_copy_or_mark_from_roots (size_t *old_next_pin_slot, gboolean start_concurr
        TV_GETTIME (atv);
        time_major_scan_pinned += TV_ELAPSED (btv, atv);
 
-       /* registered roots, this includes static fields */
-       scrrjd_normal = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       scrrjd_normal->copy_or_mark_func = current_object_ops.copy_or_mark_object;
-       scrrjd_normal->scan_func = current_object_ops.scan_object;
-       scrrjd_normal->heap_start = heap_start;
-       scrrjd_normal->heap_end = heap_end;
-       scrrjd_normal->root_type = ROOT_TYPE_NORMAL;
-       sgen_workers_enqueue_job ("scan from registered roots normal", job_scan_from_registered_roots, scrrjd_normal);
-
-       scrrjd_wbarrier = sgen_alloc_internal_dynamic (sizeof (ScanFromRegisteredRootsJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       scrrjd_wbarrier->copy_or_mark_func = current_object_ops.copy_or_mark_object;
-       scrrjd_wbarrier->scan_func = current_object_ops.scan_object;
-       scrrjd_wbarrier->heap_start = heap_start;
-       scrrjd_wbarrier->heap_end = heap_end;
-       scrrjd_wbarrier->root_type = ROOT_TYPE_WBARRIER;
-       sgen_workers_enqueue_job ("scan from registered roots wbarrier", job_scan_from_registered_roots, scrrjd_wbarrier);
-
-       TV_GETTIME (btv);
-       time_major_scan_registered_roots += TV_ELAPSED (atv, btv);
-
-       /* Threads */
-       stdjd = sgen_alloc_internal_dynamic (sizeof (ScanThreadDataJobData), INTERNAL_MEM_WORKER_JOB_DATA, TRUE);
-       stdjd->heap_start = heap_start;
-       stdjd->heap_end = heap_end;
-       sgen_workers_enqueue_job ("scan thread data", job_scan_thread_data, stdjd);
-
-       TV_GETTIME (atv);
-       time_major_scan_thread_data += TV_ELAPSED (btv, atv);
-
-       TV_GETTIME (btv);
-       time_major_scan_alloc_pinned += TV_ELAPSED (atv, btv);
-
        if (mono_profiler_get_events () & MONO_PROFILE_GC_ROOTS)
                report_finalizer_roots ();
 
-       /* scan the list of objects ready for finalization */
-       sgen_workers_enqueue_job ("scan finalizer entries", job_scan_finalizer_entries, fin_ready_list);
-       sgen_workers_enqueue_job ("scan critical finalizer entries", job_scan_finalizer_entries, critical_fin_list);
+       enqueue_scan_from_roots_jobs (heap_start, heap_end);
+
+       TV_GETTIME (btv);
+       time_major_scan_roots += TV_ELAPSED (atv, btv);
 
        if (scan_mod_union) {
                g_assert (finish_up_concurrent_mark);
 
                /* Mod union card table */
-               sgen_workers_enqueue_job ("scan mod union cardtable", job_scan_major_mod_union_cardtable, NULL);
-               sgen_workers_enqueue_job ("scan LOS mod union cardtable", job_scan_los_mod_union_cardtable, NULL);
-       }
-
-       TV_GETTIME (atv);
-       time_major_scan_finalized += TV_ELAPSED (btv, atv);
-       SGEN_LOG (2, "Root scan: %d usecs", TV_ELAPSED (btv, atv));
+               sgen_workers_enqueue_job (sgen_thread_pool_job_alloc ("scan mod union cardtable", job_scan_major_mod_union_cardtable, sizeof (SgenThreadPoolJob)));
+               sgen_workers_enqueue_job (sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_cardtable, sizeof (SgenThreadPoolJob)));
 
-       TV_GETTIME (btv);
-       time_major_scan_big_objects += TV_ELAPSED (atv, btv);
+               TV_GETTIME (atv);
+               time_major_scan_mod_union += TV_ELAPSED (btv, atv);
+       }
 }
 
 static void
@@ -2749,14 +2711,7 @@ major_start_collection (gboolean concurrent, size_t *old_next_pin_slot)
 }
 
 static void
-wait_for_workers_to_finish (void)
-{
-       while (!sgen_workers_all_done ())
-               g_usleep (200);
-}
-
-static void
-major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean scan_whole_nursery)
+major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean forced, gboolean scan_whole_nursery)
 {
        ScannedObjectCounts counts;
        LOSObject *bigobj, *prevbo;
@@ -2807,14 +2762,6 @@ major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean
 
        SGEN_ASSERT (0, sgen_workers_all_done (), "Can't have workers working after joining");
 
-       /*
-        * The (single-threaded) finalization code might have done
-        * some copying/marking so we can only reset the GC thread's
-        * worker data here instead of earlier when we joined the
-        * workers.
-        */
-       sgen_workers_reset_data ();
-
        if (objects_pinned) {
                g_assert (!concurrent_collection_in_progress);
 
@@ -2916,7 +2863,7 @@ major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean
 
        g_assert (sgen_gray_object_queue_is_empty (&gray_queue));
 
-       sgen_memgov_major_collection_end ();
+       sgen_memgov_major_collection_end (forced);
        current_collection_generation = -1;
 
        memset (&counts, 0, sizeof (ScannedObjectCounts));
@@ -2939,7 +2886,7 @@ major_finish_collection (const char *reason, size_t old_next_pin_slot, gboolean
 }
 
 static gboolean
-major_do_collection (const char *reason)
+major_do_collection (const char *reason, gboolean forced)
 {
        TV_DECLARE (time_start);
        TV_DECLARE (time_end);
@@ -2957,7 +2904,7 @@ major_do_collection (const char *reason)
        TV_GETTIME (time_start);
 
        major_start_collection (FALSE, &old_next_pin_slot);
-       major_finish_collection (reason, old_next_pin_slot, FALSE);
+       major_finish_collection (reason, old_next_pin_slot, forced, FALSE);
 
        TV_GETTIME (time_end);
        gc_stats.major_gc_time += TV_ELAPSED (time_start, time_end);
@@ -3033,7 +2980,7 @@ major_update_concurrent_collection (void)
 }
 
 static void
-major_finish_concurrent_collection (void)
+major_finish_concurrent_collection (gboolean forced)
 {
        TV_DECLARE (total_start);
        TV_DECLARE (total_end);
@@ -3052,7 +2999,7 @@ major_finish_concurrent_collection (void)
         * marking before the nursery collection is allowed to run, otherwise we might miss
         * some remsets.
         */
-       wait_for_workers_to_finish ();
+       sgen_workers_wait ();
 
        SGEN_TV_GETTIME (time_major_conc_collection_end);
        gc_stats.major_gc_time_concurrent += SGEN_TV_ELAPSED (time_major_conc_collection_start, time_major_conc_collection_end);
@@ -3066,7 +3013,7 @@ major_finish_concurrent_collection (void)
                sgen_check_mod_union_consistency ();
 
        current_collection_generation = GENERATION_OLD;
-       major_finish_collection ("finishing", -1, late_pinned);
+       major_finish_collection ("finishing", -1, forced, late_pinned);
 
        if (whole_heap_check_before_collection)
                sgen_check_whole_heap (FALSE);
@@ -3093,7 +3040,6 @@ sgen_ensure_free_space (size_t size)
        int generation_to_collect = -1;
        const char *reason = NULL;
 
-
        if (size > SGEN_MAX_SMALL_OBJ_SIZE) {
                if (sgen_need_major_collection (size)) {
                        reason = "LOS overflow";
@@ -3163,7 +3109,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                gboolean finish = major_should_finish_concurrent_collection () || (wait_to_finish && generation_to_collect == GENERATION_OLD);
 
                if (finish) {
-                       major_finish_concurrent_collection ();
+                       major_finish_concurrent_collection (wait_to_finish);
                        oldest_generation_collected = GENERATION_OLD;
                } else {
                        sgen_workers_signal_start_nursery_collection_and_wait ();
@@ -3210,7 +3156,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                        goto done;
                }
 
-               if (major_do_collection (reason)) {
+               if (major_do_collection (reason, wait_to_finish)) {
                        overflow_generation_to_collect = GENERATION_NURSERY;
                        overflow_reason = "Excessive pinning";
                }
@@ -3242,7 +3188,7 @@ sgen_perform_collection (size_t requested_size, int generation_to_collect, const
                if (overflow_generation_to_collect == GENERATION_NURSERY)
                        collect_nursery (NULL, FALSE);
                else
-                       major_do_collection (overflow_reason);
+                       major_do_collection (overflow_reason, wait_to_finish);
 
                TV_GETTIME (gc_end);
                infos [1].total_time = SGEN_TV_ELAPSED (infos [1].total_time, gc_end);
@@ -4855,9 +4801,6 @@ mono_gc_base_init (void)
                g_strfreev (opts);
        }
 
-       if (major_collector.is_concurrent)
-               sgen_workers_init (1);
-
        if (major_collector_opt)
                g_free (major_collector_opt);
 
@@ -5029,6 +4972,9 @@ mono_gc_base_init (void)
        if (major_collector.post_param_init)
                major_collector.post_param_init (&major_collector);
 
+       if (major_collector.needs_thread_pool)
+               sgen_workers_init (1);
+
        sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);
 
        memset (&remset, 0, sizeof (remset));
index f9c09677b86015038ac5b0bc63dd6cc3dc54a712..c157e5257e8fefd4cfbf2fce943754ea55e6c4f4 100644 (file)
@@ -169,6 +169,7 @@ extern LOCK_DECLARE (sgen_interruption_mutex);
 #define UNLOCK_INTERRUPTION mono_mutex_unlock (&sgen_interruption_mutex)
 
 /* FIXME: Use InterlockedAdd & InterlockedAdd64 to reduce the CAS cost. */
+#define SGEN_CAS       InterlockedCompareExchange
 #define SGEN_CAS_PTR   InterlockedCompareExchangePointer
 #define SGEN_ATOMIC_ADD(x,i)   do {                                    \
                int __old_x;                                            \
@@ -391,8 +392,6 @@ gboolean sgen_resume_thread (SgenThreadInfo *info);
 void sgen_wait_for_suspend_ack (int count);
 void sgen_os_init (void);
 
-gboolean sgen_is_worker_thread (MonoNativeThreadId thread);
-
 void sgen_update_heap_boundaries (mword low, mword high);
 
 void sgen_scan_area_with_callback (char *start, char *end, IterateObjectCallbackFunc callback, void *data, gboolean allow_flags);
@@ -420,7 +419,7 @@ enum {
        INTERNAL_MEM_MS_BLOCK_INFO_SORT,
        INTERNAL_MEM_EPHEMERON_LINK,
        INTERNAL_MEM_WORKER_DATA,
-       INTERNAL_MEM_WORKER_JOB_DATA,
+       INTERNAL_MEM_THREAD_POOL_JOB,
        INTERNAL_MEM_BRIDGE_DATA,
        INTERNAL_MEM_OLD_BRIDGE_HASH_TABLE,
        INTERNAL_MEM_OLD_BRIDGE_HASH_TABLE_ENTRY,
@@ -432,7 +431,6 @@ enum {
        INTERNAL_MEM_TARJAN_BRIDGE_HASH_TABLE_ENTRY,
        INTERNAL_MEM_TARJAN_OBJ_BUCKET,
        INTERNAL_MEM_BRIDGE_DEBUG,
-       INTERNAL_MEM_JOB_QUEUE_ENTRY,
        INTERNAL_MEM_TOGGLEREF_DATA,
        INTERNAL_MEM_CARDTABLE_MOD_UNION,
        INTERNAL_MEM_BINARY_PROTOCOL,
@@ -612,12 +610,13 @@ void sgen_split_nursery_init (SgenMinorCollector *collector);
 /* Updating references */
 
 #ifdef SGEN_CHECK_UPDATE_REFERENCE
+gboolean sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId some_thread) MONO_INTERNAL;
 static inline void
 sgen_update_reference (void **p, void *o, gboolean allow_null)
 {
        if (!allow_null)
                SGEN_ASSERT (0, o, "Cannot update a reference with a NULL pointer");
-       SGEN_ASSERT (0, !sgen_is_worker_thread (mono_native_thread_id_get ()), "Can't update a reference in the worker thread");
+       SGEN_ASSERT (0, !sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "Can't update a reference in the worker thread");
        *p = o;
 }
 
@@ -653,6 +652,7 @@ typedef struct _SgenMajorCollector SgenMajorCollector;
 struct _SgenMajorCollector {
        size_t section_size;
        gboolean is_concurrent;
+       gboolean needs_thread_pool;
        gboolean supports_cardtable;
        gboolean sweeps_lazily;
 
@@ -673,7 +673,13 @@ struct _SgenMajorCollector {
 
        void* (*alloc_object) (MonoVTable *vtable, size_t size, gboolean has_references);
        void (*free_pinned_object) (char *obj, size_t size);
+
+       /*
+        * This is used for domain unloading, heap walking from the logging profiler, and
+        * debugging.  Can assume the world is stopped.
+        */
        void (*iterate_objects) (IterateObjectsFlags flags, IterateObjectCallbackFunc callback, void *data);
+
        void (*free_non_pinned_object) (char *obj, size_t size);
        void (*pin_objects) (SgenGrayQueue *queue);
        void (*pin_major_object) (char *obj, SgenGrayQueue *queue);
@@ -682,8 +688,9 @@ struct _SgenMajorCollector {
        void (*update_cardtable_mod_union) (void);
        void (*init_to_space) (void);
        void (*sweep) (void);
-       gboolean (*have_finished_sweeping) (void);
-       void (*free_swept_blocks) (void);
+       gboolean (*have_swept) (void);
+       void (*finish_sweeping) (void);
+       void (*free_swept_blocks) (size_t allowance);
        void (*check_scan_starts) (void);
        void (*dump_heap) (FILE *heap_dump_file);
        gint64 (*get_used_size) (void);
@@ -696,13 +703,10 @@ struct _SgenMajorCollector {
        gboolean (*obj_is_from_pinned_alloc) (char *obj);
        void (*report_pinned_memory_usage) (void);
        size_t (*get_num_major_sections) (void);
+       size_t (*get_bytes_survived_last_sweep) (void);
        gboolean (*handle_gc_param) (const char *opt);
        void (*print_gc_param_usage) (void);
-       gboolean (*is_worker_thread) (MonoNativeThreadId thread);
        void (*post_param_init) (SgenMajorCollector *collector);
-       void* (*alloc_worker_data) (void);
-       void (*init_worker_thread) (void *data);
-       void (*reset_worker_data) (void *data);
        gboolean (*is_valid_object) (char *object);
        MonoVTable* (*describe_pointer) (char *pointer);
        guint8* (*get_cardtable_mod_union_for_object) (char *object);
@@ -959,6 +963,7 @@ typedef struct {
 
 int sgen_stop_world (int generation);
 int sgen_restart_world (int generation, GGTimingInfo *timing);
+gboolean sgen_is_world_stopped (void);
 void sgen_init_stw (void);
 
 /* LOS */
index f5b8995355fe58edeb341cb656fd8c0763ade96b..dc484cc2a10bbc85204b75ee62368466d03360bc 100644 (file)
@@ -134,7 +134,7 @@ description_for_type (int type)
        case INTERNAL_MEM_MS_BLOCK_INFO_SORT: return "marksweep-block-info-sort";
        case INTERNAL_MEM_EPHEMERON_LINK: return "ephemeron-link";
        case INTERNAL_MEM_WORKER_DATA: return "worker-data";
-       case INTERNAL_MEM_WORKER_JOB_DATA: return "worker-job-data";
+       case INTERNAL_MEM_THREAD_POOL_JOB: return "thread-pool-job";
        case INTERNAL_MEM_BRIDGE_DATA: return "bridge-data";
        case INTERNAL_MEM_OLD_BRIDGE_HASH_TABLE: return "old-bridge-hash-table";
        case INTERNAL_MEM_OLD_BRIDGE_HASH_TABLE_ENTRY: return "old-bridge-hash-table-entry";
@@ -146,7 +146,6 @@ description_for_type (int type)
        case INTERNAL_MEM_BRIDGE_ALIVE_HASH_TABLE: return "bridge-alive-hash-table";
        case INTERNAL_MEM_BRIDGE_ALIVE_HASH_TABLE_ENTRY: return "bridge-alive-hash-table-entry";
        case INTERNAL_MEM_BRIDGE_DEBUG: return "bridge-debug";
-       case INTERNAL_MEM_JOB_QUEUE_ENTRY: return "job-queue-entry";
        case INTERNAL_MEM_TOGGLEREF_DATA: return "toggleref-data";
        case INTERNAL_MEM_CARDTABLE_MOD_UNION: return "cardtable-mod-union";
        case INTERNAL_MEM_BINARY_PROTOCOL: return "binary-protocol";
index db91b6773c85c5f9f898137fdb4b7d9b9b23d6cd..90a9a84c8c0f4ab994b322894ee605dbd0200a58 100644 (file)
@@ -43,6 +43,7 @@
 #include "metadata/sgen-pointer-queue.h"
 #include "metadata/sgen-pinning.h"
 #include "metadata/sgen-workers.h"
+#include "metadata/sgen-thread-pool.h"
 
 #if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT)
 #define MS_BLOCK_SIZE  ARCH_MIN_MS_BLOCK_SIZE
 
 #define MS_NUM_MARK_WORDS      ((MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (mword) * 8 - 1) / (sizeof (mword) * 8))
 
+/*
+ * Blocks progress from one state to the next:
+ *
+ * SWEPT           The block is fully swept.  It might or might not be in
+ *                 a free list.
+ *
+ * MARKING         The block might or might not contain live objects.  If
+ *                 we're in between an initial collection pause and the
+ *                 finishing pause, the block might or might not be in a
+ *                 free list.
+ *
+ * CHECKING        The sweep thread is investigating the block to determine
+ *                 whether or not it contains live objects.  The block is
+ *                 not in a free list.
+ *
+ * NEED_SWEEPING   The block contains live objects but has not yet been
+ *                 swept.  It also contains free slots.  It is in a block
+ *                 free list.
+ *
+ * SWEEPING        The block is being swept.  It might be in a free list.
+ */
+
+enum {
+       BLOCK_STATE_SWEPT,
+       BLOCK_STATE_MARKING,
+       BLOCK_STATE_CHECKING,
+       BLOCK_STATE_NEED_SWEEPING,
+       BLOCK_STATE_SWEEPING
+};
+
 typedef struct _MSBlockInfo MSBlockInfo;
 struct _MSBlockInfo {
        guint16 obj_size;
@@ -79,11 +110,12 @@ struct _MSBlockInfo {
         * recalculating to save the space.
         */
        guint16 obj_size_index;
+       /* FIXME: Reduce this - it only needs a byte. */
+       volatile gint32 state;
        unsigned int pinned : 1;
        unsigned int has_references : 1;
        unsigned int has_pinned : 1;    /* means cannot evacuate */
        unsigned int is_to_space : 1;
-       unsigned int swept : 1;
        void ** volatile free_list;
        MSBlockInfo * volatile next_free;
        guint8 *cardtable_mod_union;
@@ -144,16 +176,30 @@ static float evacuation_threshold = 0.666f;
 static float concurrent_evacuation_threshold = 0.666f;
 static gboolean want_evacuation = FALSE;
 
-static gboolean lazy_sweep = TRUE;
-static gboolean have_swept = TRUE;
+static gboolean lazy_sweep = FALSE;
+
+enum {
+       SWEEP_STATE_SWEPT,
+       SWEEP_STATE_NEED_SWEEPING,
+       SWEEP_STATE_SWEEPING,
+       SWEEP_STATE_SWEEPING_AND_ITERATING,
+       SWEEP_STATE_COMPACTING
+};
+
+static volatile int sweep_state = SWEEP_STATE_SWEPT;
 
 static gboolean concurrent_mark;
+static gboolean concurrent_sweep = TRUE;
 
 #define BLOCK_IS_TAGGED_HAS_REFERENCES(bl)     SGEN_POINTER_IS_TAGGED_1 ((bl))
 #define BLOCK_TAG_HAS_REFERENCES(bl)           SGEN_POINTER_TAG_1 ((bl))
-#define BLOCK_UNTAG_HAS_REFERENCES(bl)         SGEN_POINTER_UNTAG_1 ((bl))
 
-#define BLOCK_TAG(bl)  ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
+#define BLOCK_IS_TAGGED_CHECKING(bl)           SGEN_POINTER_IS_TAGGED_2 ((bl))
+#define BLOCK_TAG_CHECKING(bl)                 SGEN_POINTER_TAG_2 ((bl))
+
+#define BLOCK_UNTAG(bl)                                SGEN_POINTER_UNTAG_12 ((bl))
+
+#define BLOCK_TAG(bl)                          ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
 
 /* all allocated blocks in the system */
 static SgenPointerQueue allocated_blocks;
@@ -162,14 +208,42 @@ static SgenPointerQueue allocated_blocks;
 static void *empty_blocks = NULL;
 static size_t num_empty_blocks = 0;
 
-#define FOREACH_BLOCK(bl)      { size_t __index; for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [__index]);
-#define FOREACH_BLOCK_HAS_REFERENCES(bl,hr)    { size_t __index; for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { (bl) = allocated_blocks.data [__index]; (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); (bl) = BLOCK_UNTAG_HAS_REFERENCES ((bl));
-#define END_FOREACH_BLOCK      } }
-#define DELETE_BLOCK_IN_FOREACH()      (allocated_blocks.data [__index] = NULL)
-
-static size_t num_major_sections = 0;
-/* one free block list for each block object size */
-static MSBlockInfo **free_block_lists [MS_BLOCK_TYPE_MAX];
+#define FOREACH_BLOCK_NO_LOCK_CONDITION(cond,bl) {                     \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, (cond) && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = BLOCK_UNTAG (allocated_blocks.data [__index]);
+#define FOREACH_BLOCK_NO_LOCK(bl)                                      \
+       FOREACH_BLOCK_NO_LOCK_CONDITION(sgen_is_world_stopped (), bl)
+#define FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK(bl,hr) {                  \
+       size_t __index;                                                 \
+       SGEN_ASSERT (0, sgen_is_world_stopped () && !sweep_in_progress (), "Can't iterate blocks while the world is running or sweep is in progress."); \
+       for (__index = 0; __index < allocated_blocks.next_slot; ++__index) { \
+               (bl) = allocated_blocks.data [__index];                 \
+               (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl));           \
+               (bl) = BLOCK_UNTAG ((bl));
+#define END_FOREACH_BLOCK_NO_LOCK      } }
+
+static volatile size_t num_major_sections = 0;
+/*
+ * One free block list for each block object size.  We add and remove blocks from these
+ * lists lock-free via CAS.
+ *
+ * Blocks accessed/removed from `free_block_lists`:
+ *   from the mutator (with GC lock held)
+ *   in nursery collections
+ *   in non-concurrent major collections
+ *   in the finishing pause of concurrent major collections (whole list is cleared)
+ *
+ * Blocks added to `free_block_lists`:
+ *   in the sweeping thread
+ *   during nursery collections
+ *   from domain clearing (with the world stopped and no sweeping happening)
+ *
+ * The only item of those that doesn't require the GC lock is the sweep thread.  The sweep
+ * thread only ever adds blocks to the free list, so the ABA problem can't occur.
+ */
+static MSBlockInfo * volatile *free_block_lists [MS_BLOCK_TYPE_MAX];
 
 static guint64 stat_major_blocks_alloced = 0;
 static guint64 stat_major_blocks_freed = 0;
@@ -206,8 +280,7 @@ add_scanned_object (void *ptr)
 }
 #endif
 
-static void
-sweep_block (MSBlockInfo *block, gboolean during_major_collection);
+static gboolean sweep_block (MSBlockInfo *block);
 
 static int
 ms_find_block_obj_size_index (size_t size)
@@ -246,6 +319,9 @@ update_heap_boundaries_for_block (MSBlockInfo *block)
        sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE);
 }
 
+/*
+ * Thread safe
+ */
 static void*
 ms_get_empty_block (void)
 {
@@ -327,6 +403,24 @@ ms_free_block (void *block)
        } while (SGEN_CAS_PTR (&empty_blocks, block, empty) != empty);
 
        SGEN_ATOMIC_ADD_P (num_empty_blocks, 1);
+
+       binary_protocol_block_free (block, MS_BLOCK_SIZE);
+}
+
+static gboolean
+sweep_in_progress (void)
+{
+       int state = sweep_state;
+       return state == SWEEP_STATE_SWEEPING ||
+               state == SWEEP_STATE_SWEEPING_AND_ITERATING ||
+               state == SWEEP_STATE_COMPACTING;
+}
+
+static inline gboolean
+block_is_swept_or_marking (MSBlockInfo *block)
+{
+       gint32 state = block->state;
+       return state == BLOCK_STATE_SWEPT || state == BLOCK_STATE_MARKING;
 }
 
 //#define MARKSWEEP_CONSISTENCY_CHECK
@@ -335,16 +429,15 @@ ms_free_block (void *block)
 static void
 check_block_free_list (MSBlockInfo *block, int size, gboolean pinned)
 {
-       MSBlockInfo *b;
-
+       SGEN_ASSERT (0, !sweep_in_progress (), "Can't examine allocated blocks during sweep");
        for (; block; block = block->next_free) {
+               SGEN_ASSERT (0, block->state != BLOCK_STATE_CHECKING, "Can't have a block we're checking in a free list.");
                g_assert (block->obj_size == size);
                g_assert ((pinned && block->pinned) || (!pinned && !block->pinned));
 
                /* blocks in the free lists must have at least
                   one free slot */
-               if (block->swept)
-                       g_assert (block->free_list);
+               g_assert (block->free_list);
 
                /* the block must be in the allocated_blocks array */
                g_assert (sgen_pointer_queue_find (&allocated_blocks, BLOCK_TAG (block)) != (size_t)-1);
@@ -368,14 +461,11 @@ consistency_check (void)
        int i;
 
        /* check all blocks */
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int num_free = 0;
                void **free;
 
-               /* check block header */
-               g_assert (((MSBlockHeader*)block->block)->info == block);
-
                /* count number of free slots */
                for (i = 0; i < count; ++i) {
                        void **obj = (void**) MS_BLOCK_OBJ (block, i);
@@ -391,11 +481,11 @@ consistency_check (void)
                g_assert (num_free == 0);
 
                /* check all mark words are zero */
-               if (block->swept) {
+               if (!sgen_concurrent_collection_in_progress () && block_is_swept_or_marking (block)) {
                        for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
                                g_assert (block->mark_words [i] == 0);
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        /* check free blocks */
        for (i = 0; i < num_block_obj_sizes; ++i) {
@@ -408,13 +498,24 @@ consistency_check (void)
 }
 #endif
 
+static void
+add_free_block (MSBlockInfo * volatile *free_blocks, int size_index, MSBlockInfo *block)
+{
+       MSBlockInfo *old;
+       do {
+               block->next_free = old = free_blocks [size_index];
+       } while (SGEN_CAS_PTR ((gpointer)&free_blocks [size_index], block, old) != old);
+}
+
+static void major_finish_sweep_checking (void);
+
 static gboolean
 ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
 {
        int size = block_obj_sizes [size_index];
        int count = MS_BLOCK_FREE / size;
        MSBlockInfo *info;
-       MSBlockInfo **free_blocks = FREE_BLOCKS (pinned, has_references);
+       MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
        char *obj_start;
        int i;
 
@@ -437,11 +538,14 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
         * want further evacuation.
         */
        info->is_to_space = (sgen_get_current_collection_generation () == GENERATION_OLD);
-       info->swept = 1;
+       info->state = (info->is_to_space || sgen_concurrent_collection_in_progress ()) ? BLOCK_STATE_MARKING : BLOCK_STATE_SWEPT;
+       SGEN_ASSERT (6, !sweep_in_progress () || info->state == BLOCK_STATE_SWEPT, "How do we add a new block to be swept while sweeping?");
        info->cardtable_mod_union = NULL;
 
        update_heap_boundaries_for_block (info);
 
+       binary_protocol_block_alloc (info, MS_BLOCK_SIZE);
+
        /* build free list */
        obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP;
        info->free_list = (void**)obj_start;
@@ -454,12 +558,19 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references)
        /* the last one */
        *(void**)obj_start = NULL;
 
-       info->next_free = free_blocks [size_index];
-       free_blocks [size_index] = info;
+       add_free_block (free_blocks, size_index, info);
+
+       /*
+        * This is the only place where the `allocated_blocks` array can potentially grow.
+        * We need to make sure concurrent sweep isn't running when that happens, so in that
+        * specific case we just wait for sweep to finish.
+        */
+       if (sgen_pointer_queue_will_grow (&allocated_blocks))
+               major_finish_sweep_checking ();
 
        sgen_pointer_queue_add (&allocated_blocks, BLOCK_TAG (info));
 
-       ++num_major_sections;
+       SGEN_ATOMIC_ADD_P (num_major_sections, 1);
        return TRUE;
 }
 
@@ -468,36 +579,68 @@ obj_is_from_pinned_alloc (char *ptr)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE)
                        return block->pinned;
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
        return FALSE;
 }
 
+static void
+ensure_can_access_block_free_list (MSBlockInfo *block)
+{
+ retry:
+       for (;;) {
+               switch (block->state) {
+               case BLOCK_STATE_SWEPT:
+               case BLOCK_STATE_MARKING:
+                       return;
+               case BLOCK_STATE_CHECKING:
+                       SGEN_ASSERT (0, FALSE, "How did we get a block that's being checked from a free list?");
+                       break;
+               case BLOCK_STATE_NEED_SWEEPING:
+                       if (sweep_block (block))
+                               ++stat_major_blocks_lazy_swept;
+                       break;
+               case BLOCK_STATE_SWEEPING:
+                       /* FIXME: do this more elegantly */
+                       g_usleep (100);
+                       goto retry;
+               default:
+                       SGEN_ASSERT (0, FALSE, "Illegal block state");
+                       break;
+               }
+       }
+}
+
 static void*
-unlink_slot_from_free_list_uncontested (MSBlockInfo **free_blocks, int size_index)
+unlink_slot_from_free_list_uncontested (MSBlockInfo * volatile *free_blocks, int size_index)
 {
-       MSBlockInfo *block;
-       void *obj;
+       MSBlockInfo *block, *next_free_block;
+       void *obj, *next_free_slot;
 
+ retry:
        block = free_blocks [size_index];
        SGEN_ASSERT (9, block, "no free block to unlink from free_blocks %p size_index %d", free_blocks, size_index);
 
-       if (G_UNLIKELY (!block->swept)) {
-               stat_major_blocks_lazy_swept ++;
-               sweep_block (block, FALSE);
-       }
+       ensure_can_access_block_free_list (block);
 
        obj = block->free_list;
-       SGEN_ASSERT (9, obj, "block %p in free list had no available object to alloc from", block);
+       SGEN_ASSERT (6, obj, "block %p in free list had no available object to alloc from", block);
 
-       block->free_list = *(void**)obj;
-       if (!block->free_list) {
-               free_blocks [size_index] = block->next_free;
-               block->next_free = NULL;
+       next_free_slot = *(void**)obj;
+       if (next_free_slot) {
+               block->free_list = next_free_slot;
+               return obj;
        }
 
+       next_free_block = block->next_free;
+       if (SGEN_CAS_PTR ((gpointer)&free_blocks [size_index], next_free_block, block) != block)
+               goto retry;
+
+       block->free_list = NULL;
+       block->next_free = NULL;
+
        return obj;
 }
 
@@ -505,7 +648,7 @@ static void*
 alloc_obj (MonoVTable *vtable, size_t size, gboolean pinned, gboolean has_references)
 {
        int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
-       MSBlockInfo **free_blocks = FREE_BLOCKS (pinned, has_references);
+       MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (pinned, has_references);
        void *obj;
 
        if (!free_blocks [size_index]) {
@@ -538,23 +681,28 @@ free_object (char *obj, size_t size, gboolean pinned)
 {
        MSBlockInfo *block = MS_BLOCK_FOR_OBJ (obj);
        int word, bit;
+       gboolean in_free_list;
+
+       SGEN_ASSERT (9, sweep_state == SWEEP_STATE_SWEPT, "Should have waited for sweep to free objects.");
 
-       if (!block->swept)
-               sweep_block (block, FALSE);
+       ensure_can_access_block_free_list (block);
        SGEN_ASSERT (9, (pinned && block->pinned) || (!pinned && !block->pinned), "free-object pinning mixup object %p pinned %d block %p pinned %d", obj, pinned, block, block->pinned);
        SGEN_ASSERT (9, MS_OBJ_ALLOCED (obj, block), "object %p is already free", obj);
        MS_CALC_MARK_BIT (word, bit, obj);
        SGEN_ASSERT (9, !MS_MARK_BIT (block, word, bit), "object %p has mark bit set");
-       if (!block->free_list) {
-               MSBlockInfo **free_blocks = FREE_BLOCKS (pinned, block->has_references);
-               int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
-               SGEN_ASSERT (9, !block->next_free, "block %p doesn't have a free-list of object but belongs to a free-list of blocks");
-               block->next_free = free_blocks [size_index];
-               free_blocks [size_index] = block;
-       }
+
        memset (obj, 0, size);
+
+       in_free_list = !!block->free_list;
        *(void**)obj = block->free_list;
        block->free_list = (void**)obj;
+
+       if (!in_free_list) {
+               MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (pinned, block->has_references);
+               int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
+               SGEN_ASSERT (9, !block->next_free, "block %p doesn't have a free-list of object but belongs to a free-list of blocks");
+               add_free_block (free_blocks, size_index, block);
+       }
 }
 
 static void
@@ -592,17 +740,10 @@ free_pinned_object (char *obj, size_t size)
 static void*
 major_alloc_degraded (MonoVTable *vtable, size_t size)
 {
-       void *obj;
-       size_t old_num_sections;
-
-       old_num_sections = num_major_sections;
-
-       obj = alloc_obj (vtable, size, FALSE, SGEN_VTABLE_HAS_REFERENCES (vtable));
+       void *obj = alloc_obj (vtable, size, FALSE, SGEN_VTABLE_HAS_REFERENCES (vtable));
        if (G_LIKELY (obj)) {
                HEAVY_STAT (++stat_objects_alloced_degraded);
                HEAVY_STAT (stat_bytes_alloced_degraded += size);
-               g_assert (num_major_sections >= old_num_sections);
-               sgen_register_major_sections_alloced (num_major_sections - old_num_sections);
        }
        return obj;
 }
@@ -640,7 +781,7 @@ major_ptr_is_in_non_pinned_space (char *ptr, char **start)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) {
                        int count = MS_BLOCK_FREE / block->obj_size;
                        int i;
@@ -654,10 +795,71 @@ major_ptr_is_in_non_pinned_space (char *ptr, char **start)
                        }
                        return !block->pinned;
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
        return FALSE;
 }
 
+static gboolean
+try_set_sweep_state (int new, int expected)
+{
+       int old = SGEN_CAS (&sweep_state, new, expected);
+       return old == expected;
+}
+
+static void
+set_sweep_state (int new, int expected)
+{
+       gboolean success = try_set_sweep_state (new, expected);
+       SGEN_ASSERT (0, success, "Could not set sweep state.");
+}
+
+static gboolean ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked);
+
+static SgenThreadPoolJob * volatile sweep_job;
+
+static void
+major_finish_sweep_checking (void)
+{
+       int block_index;
+       SgenThreadPoolJob *job;
+
+ retry:
+       switch (sweep_state) {
+       case SWEEP_STATE_SWEPT:
+       case SWEEP_STATE_NEED_SWEEPING:
+               return;
+       case SWEEP_STATE_SWEEPING:
+               if (try_set_sweep_state (SWEEP_STATE_SWEEPING_AND_ITERATING, SWEEP_STATE_SWEEPING))
+                       break;
+               goto retry;
+       case SWEEP_STATE_SWEEPING_AND_ITERATING:
+               SGEN_ASSERT (0, FALSE, "Is there another minor collection running?");
+               goto retry;
+       case SWEEP_STATE_COMPACTING:
+               goto wait;
+       default:
+               SGEN_ASSERT (0, FALSE, "Invalid sweep state.");
+               break;
+       }
+
+       /*
+        * We're running with the world stopped and the only other thread doing work is the
+        * sweep thread, which doesn't add blocks to the array, so we can safely access
+        * `next_slot`.
+        */
+       for (block_index = 0; block_index < allocated_blocks.next_slot; ++block_index)
+               ensure_block_is_checked_for_sweeping (block_index, FALSE, NULL);
+
+       set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_SWEEPING_AND_ITERATING);
+
+ wait:
+       job = sweep_job;
+       if (job)
+               sgen_thread_pool_job_wait (job);
+       SGEN_ASSERT (0, !sweep_job, "Why did the sweep job not null itself?");
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "How is the sweep job done but we're not swept?");
+}
+
 static void
 major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc callback, void *data)
 {
@@ -666,7 +868,8 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
        gboolean pinned = flags & ITERATE_OBJECTS_PINNED;
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int i;
 
@@ -675,14 +878,21 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
                if (!block->pinned && !non_pinned)
                        continue;
                if (sweep && lazy_sweep) {
-                       sweep_block (block, FALSE);
-                       SGEN_ASSERT (0, block->swept, "Block must be swept after sweeping");
+                       sweep_block (block);
+                       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEPT, "Block must be swept after sweeping");
                }
 
                for (i = 0; i < count; ++i) {
                        void **obj = (void**) MS_BLOCK_OBJ (block, i);
-                       if (!block->swept) {
+                       /*
+                        * We've finished sweep checking, but if we're sweeping lazily and
+                        * the flags don't require us to sweep, the block might still need
+                        * sweeping.  In that case, we need to consult the mark bits to tell
+                        * us whether an object slot is live.
+                        */
+                       if (!block_is_swept_or_marking (block)) {
                                int word, bit;
+                               SGEN_ASSERT (6, !sweep && block->state == BLOCK_STATE_NEED_SWEEPING, "Has sweeping not finished?");
                                MS_CALC_MARK_BIT (word, bit, obj);
                                if (!MS_MARK_BIT (block, word, bit))
                                        continue;
@@ -690,7 +900,7 @@ major_iterate_objects (IterateObjectsFlags flags, IterateObjectCallbackFunc call
                        if (MS_OBJ_ALLOCED (obj, block))
                                callback ((char*)obj, block->obj_size, data);
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static gboolean
@@ -698,7 +908,7 @@ major_is_valid_object (char *object)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int idx;
                char *obj;
 
@@ -710,7 +920,7 @@ major_is_valid_object (char *object)
                if (obj != object)
                        return FALSE;
                return MS_OBJ_ALLOCED (obj, block);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        return FALSE;
 }
@@ -721,7 +931,7 @@ major_describe_pointer (char *ptr)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int idx;
                char *obj;
                gboolean live;
@@ -759,7 +969,7 @@ major_describe_pointer (char *ptr)
                SGEN_LOG (0, " marked %d)\n", marked ? 1 : 0);
 
                return vtable;
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        return NULL;
 }
@@ -780,7 +990,7 @@ major_dump_heap (FILE *heap_dump_file)
        for (i = 0; i < num_block_obj_sizes; ++i)
                slots_available [i] = slots_used [i] = 0;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int index = ms_find_block_obj_size_index (block->obj_size);
                int count = MS_BLOCK_FREE / block->obj_size;
 
@@ -789,7 +999,7 @@ major_dump_heap (FILE *heap_dump_file)
                        if (MS_OBJ_ALLOCED (MS_BLOCK_OBJ (block, i), block))
                                ++slots_used [index];
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        fprintf (heap_dump_file, "<occupancies>\n");
        for (i = 0; i < num_block_obj_sizes; ++i) {
@@ -798,7 +1008,7 @@ major_dump_heap (FILE *heap_dump_file)
        }
        fprintf (heap_dump_file, "</occupancies>\n");
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                int i;
                int start = -1;
@@ -818,7 +1028,7 @@ major_dump_heap (FILE *heap_dump_file)
                }
 
                fprintf (heap_dump_file, "</section>\n");
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 #define LOAD_VTABLE    SGEN_LOAD_VTABLE
@@ -866,7 +1076,7 @@ static void
 major_copy_or_mark_object_with_evacuation_concurrent (void **ptr, void *obj, SgenGrayQueue *queue)
 {
        SGEN_ASSERT (9, sgen_concurrent_collection_in_progress (), "Why are we scanning concurrently when there's no concurrent collection on?");
-       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_is_worker_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
+       SGEN_ASSERT (9, !sgen_workers_are_working () || sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "We must not scan from two threads at the same time!");
 
        g_assert (!SGEN_OBJECT_IS_FORWARDED (obj));
 
@@ -1045,22 +1255,56 @@ sweep_block_for_size (MSBlockInfo *block, int count, int obj_size)
        }
 }
 
+static inline gboolean
+try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
+{
+       gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state);
+       gboolean success = old_state == expected_state;
+       if (success)
+               binary_protocol_block_set_state (block, MS_BLOCK_SIZE, old_state, new_state);
+       return success;
+}
+
+static inline void
+set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state)
+{
+       SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set");
+       block->state = new_state;
+}
+
 /*
- * sweep_block:
+ * If `block` needs sweeping, sweep it and return TRUE.  Otherwise return FALSE.
  *
- *   Traverse BLOCK, freeing and zeroing unused objects.
+ * Sweeping means iterating through the block's slots and building the free-list from the
+ * unmarked ones.  They will also be zeroed.  The mark bits will be reset.
  */
-static void
-sweep_block (MSBlockInfo *block, gboolean during_major_collection)
+static gboolean
+sweep_block (MSBlockInfo *block)
 {
        int count;
        void *reversed = NULL;
 
-       if (!during_major_collection)
-               g_assert (!sgen_concurrent_collection_in_progress ());
+ retry:
+       switch (block->state) {
+       case BLOCK_STATE_SWEPT:
+               return FALSE;
+       case BLOCK_STATE_MARKING:
+       case BLOCK_STATE_CHECKING:
+               SGEN_ASSERT (0, FALSE, "How did we get to sweep a block that's being marked or being checked?");
+               goto retry;
+       case BLOCK_STATE_SWEEPING:
+               /* FIXME: Do this more elegantly */
+               g_usleep (100);
+               goto retry;
+       case BLOCK_STATE_NEED_SWEEPING:
+               if (!try_set_block_state (block, BLOCK_STATE_SWEEPING, BLOCK_STATE_NEED_SWEEPING))
+                       goto retry;
+               break;
+       default:
+               SGEN_ASSERT (0, FALSE, "Illegal block state");
+       }
 
-       if (block->swept)
-               return;
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_SWEEPING, "How did we get here without setting state to sweeping?");
 
        count = MS_BLOCK_FREE / block->obj_size;
 
@@ -1090,7 +1334,11 @@ sweep_block (MSBlockInfo *block, gboolean during_major_collection)
        }
        block->free_list = reversed;
 
-       block->swept = 1;
+       mono_memory_write_barrier ();
+
+       set_block_state (block, BLOCK_STATE_SWEPT, BLOCK_STATE_SWEEPING);
+
+       return TRUE;
 }
 
 static inline int
@@ -1112,105 +1360,237 @@ bitcount (mword d)
        return count;
 }
 
+/* statistics for evacuation */
+static size_t *sweep_slots_available;
+static size_t *sweep_slots_used;
+static size_t *sweep_num_blocks;
+
+static volatile size_t num_major_sections_before_sweep;
+static volatile size_t num_major_sections_freed_in_sweep;
+
 static void
-major_sweep (void)
+sweep_start (void)
 {
        int i;
-       MSBlockInfo *block;
-
-       /* statistics for evacuation */
-       int *slots_available = alloca (sizeof (int) * num_block_obj_sizes);
-       int *slots_used = alloca (sizeof (int) * num_block_obj_sizes);
-       int *num_blocks = alloca (sizeof (int) * num_block_obj_sizes);
-
-       mword total_evacuate_heap = 0;
-       mword total_evacuate_saved = 0;
 
        for (i = 0; i < num_block_obj_sizes; ++i)
-               slots_available [i] = slots_used [i] = num_blocks [i] = 0;
+               sweep_slots_available [i] = sweep_slots_used [i] = sweep_num_blocks [i] = 0;
 
        /* clear all the free lists */
        for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i) {
-               MSBlockInfo **free_blocks = free_block_lists [i];
+               MSBlockInfo * volatile *free_blocks = free_block_lists [i];
                int j;
                for (j = 0; j < num_block_obj_sizes; ++j)
                        free_blocks [j] = NULL;
        }
+}
 
-       /* traverse all blocks, free and zero unmarked objects */
-       FOREACH_BLOCK (block) {
-               int count;
-               gboolean have_live = FALSE;
-               gboolean has_pinned;
-               gboolean have_free = FALSE;
-               int obj_size_index;
-               int nused = 0;
+static void sweep_finish (void);
 
-               obj_size_index = block->obj_size_index;
+/*
+ * If `wait` is TRUE and the block is currently being checked, this function will wait until
+ * the checking has finished.
+ *
+ * Returns whether the block is still there.  If `wait` is FALSE, the return value will not
+ * be correct, i.e. must not be used.
+ */
+static gboolean
+ensure_block_is_checked_for_sweeping (int block_index, gboolean wait, gboolean *have_checked)
+{
+       int count;
+       gboolean have_live = FALSE;
+       gboolean have_free = FALSE;
+       int nused = 0;
+       int block_state;
+       int i;
+       void *tagged_block;
+       MSBlockInfo *block;
+
+       SGEN_ASSERT (6, sweep_in_progress (), "Why do we call this function if there's no sweep in progress?");
+
+       if (have_checked)
+               *have_checked = FALSE;
 
-               has_pinned = block->has_pinned;
-               block->has_pinned = block->pinned;
+ retry:
+       tagged_block = *(void * volatile *)&allocated_blocks.data [block_index];
+       if (!tagged_block)
+               return FALSE;
 
-               block->is_to_space = FALSE;
-               block->swept = 0;
+       if (BLOCK_IS_TAGGED_CHECKING (tagged_block)) {
+               if (!wait)
+                       return FALSE;
+               /* FIXME: do this more elegantly */
+               g_usleep (100);
+               goto retry;
+       }
 
-               count = MS_BLOCK_FREE / block->obj_size;
+       if (SGEN_CAS_PTR (&allocated_blocks.data [block_index], BLOCK_TAG_CHECKING (tagged_block), tagged_block) != tagged_block)
+               goto retry;
 
-               if (block->cardtable_mod_union) {
-                       sgen_free_internal_dynamic (block->cardtable_mod_union, CARDS_PER_BLOCK, INTERNAL_MEM_CARDTABLE_MOD_UNION);
-                       block->cardtable_mod_union = NULL;
-               }
+       block = BLOCK_UNTAG (tagged_block);
+       block_state = block->state;
 
-               /* Count marked objects in the block */
-               for (i = 0; i < MS_NUM_MARK_WORDS; ++i) {
-                       nused += bitcount (block->mark_words [i]);
+       if (!sweep_in_progress ()) {
+               SGEN_ASSERT (6, block_state != BLOCK_STATE_SWEEPING && block_state != BLOCK_STATE_CHECKING, "Invalid block state.");
+               if (!lazy_sweep)
+                       SGEN_ASSERT (6, block_state != BLOCK_STATE_NEED_SWEEPING, "Invalid block state.");
+       }
+
+       switch (block_state) {
+       case BLOCK_STATE_SWEPT:
+       case BLOCK_STATE_NEED_SWEEPING:
+       case BLOCK_STATE_SWEEPING:
+               goto done;
+       case BLOCK_STATE_MARKING:
+               break;
+       case BLOCK_STATE_CHECKING:
+               SGEN_ASSERT (0, FALSE, "We set the CHECKING bit - how can the stage be CHECKING?");
+               goto done;
+       default:
+               SGEN_ASSERT (0, FALSE, "Illegal block state");
+               break;
+       }
+
+       SGEN_ASSERT (6, block->state == BLOCK_STATE_MARKING, "When we sweep all blocks must start out marking.");
+       set_block_state (block, BLOCK_STATE_CHECKING, BLOCK_STATE_MARKING);
+
+       if (have_checked)
+               *have_checked = TRUE;
+
+       block->has_pinned = block->pinned;
+
+       block->is_to_space = FALSE;
+
+       count = MS_BLOCK_FREE / block->obj_size;
+
+       if (block->cardtable_mod_union) {
+               sgen_free_internal_dynamic (block->cardtable_mod_union, CARDS_PER_BLOCK, INTERNAL_MEM_CARDTABLE_MOD_UNION);
+               block->cardtable_mod_union = NULL;
+       }
+
+       /* Count marked objects in the block */
+       for (i = 0; i < MS_NUM_MARK_WORDS; ++i)
+               nused += bitcount (block->mark_words [i]);
+
+       if (nused)
+               have_live = TRUE;
+       if (nused < count)
+               have_free = TRUE;
+
+       if (have_live) {
+               int obj_size_index = block->obj_size_index;
+               gboolean has_pinned = block->has_pinned;
+
+               set_block_state (block, BLOCK_STATE_NEED_SWEEPING, BLOCK_STATE_CHECKING);
+
+               /*
+                * FIXME: Go straight to SWEPT if there are no free slots.  We need
+                * to set the free slot list to NULL, though, and maybe update some
+                * statistics.
+                */
+               if (!lazy_sweep)
+                       sweep_block (block);
+
+               if (!has_pinned) {
+                       ++sweep_num_blocks [obj_size_index];
+                       sweep_slots_used [obj_size_index] += nused;
+                       sweep_slots_available [obj_size_index] += count;
                }
-               if (nused) {
-                       have_live = TRUE;
+
+               /*
+                * If there are free slots in the block, add
+                * the block to the corresponding free list.
+                */
+               if (have_free) {
+                       MSBlockInfo * volatile *free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
+
+                       if (!lazy_sweep)
+                               SGEN_ASSERT (6, block->free_list, "How do we not have a free list when there are free slots?");
+
+                       add_free_block (free_blocks, obj_size_index, block);
                }
-               if (nused < count)
-                       have_free = TRUE;
 
-               if (!lazy_sweep)
-                       sweep_block (block, TRUE);
+               /* FIXME: Do we need the heap boundaries while we do nursery collections? */
+               update_heap_boundaries_for_block (block);
+       } else {
+               /*
+                * Blocks without live objects are removed from the
+                * block list and freed.
+                */
+               SGEN_ASSERT (6, block_index < allocated_blocks.next_slot, "How did the number of blocks shrink?");
+               SGEN_ASSERT (6, allocated_blocks.data [block_index] == BLOCK_TAG_CHECKING (tagged_block), "How did the block move?");
 
-               if (have_live) {
-                       if (!has_pinned) {
-                               ++num_blocks [obj_size_index];
-                               slots_used [obj_size_index] += nused;
-                               slots_available [obj_size_index] += count;
-                       }
+               binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
+               ms_free_block (block);
 
-                       /*
-                        * If there are free slots in the block, add
-                        * the block to the corresponding free list.
-                        */
-                       if (have_free) {
-                               MSBlockInfo **free_blocks = FREE_BLOCKS (block->pinned, block->has_references);
-                               int index = MS_BLOCK_OBJ_SIZE_INDEX (block->obj_size);
-                               block->next_free = free_blocks [index];
-                               free_blocks [index] = block;
-                       }
+               SGEN_ATOMIC_ADD_P (num_major_sections, -1);
 
-                       update_heap_boundaries_for_block (block);
-               } else {
-                       /*
-                        * Blocks without live objects are removed from the
-                        * block list and freed.
-                        */
-                       DELETE_BLOCK_IN_FOREACH ();
+               tagged_block = NULL;
+       }
 
-                       binary_protocol_empty (MS_BLOCK_OBJ (block, 0), (char*)MS_BLOCK_OBJ (block, count) - (char*)MS_BLOCK_OBJ (block, 0));
-                       ms_free_block (block);
+ done:
+       allocated_blocks.data [block_index] = tagged_block;
+       return !!tagged_block;
+}
 
-                       --num_major_sections;
+static void
+sweep_job_func (void *thread_data_untyped, SgenThreadPoolJob *job)
+{
+       int block_index;
+       int num_blocks = num_major_sections_before_sweep;
+
+       SGEN_ASSERT (0, sweep_in_progress (), "Sweep thread called with wrong state");
+       SGEN_ASSERT (0, num_blocks <= allocated_blocks.next_slot, "How did we lose blocks?");
+
+       /*
+        * We traverse the block array from high to low.  Nursery collections will have to
+        * cooperate with the sweep thread to finish sweeping, and they will traverse from
+        * low to high, to avoid constantly colliding on the same blocks.
+        */
+       for (block_index = num_blocks - 1; block_index >= 0; --block_index) {
+               gboolean have_checked;
+
+               /*
+                * The block might have been freed by another thread doing some checking
+                * work.
+                */
+               if (!ensure_block_is_checked_for_sweeping (block_index, TRUE, &have_checked))
+                       ++num_major_sections_freed_in_sweep;
+       }
+
+       while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
+               /*
+                * The main GC thread is currently iterating over the block array to help us
+                * finish the sweep.  We have already finished, but we don't want to mess up
+                * that iteration, so we just wait for it.
+                */
+               g_usleep (100);
+       }
+
+       if (SGEN_MAX_ASSERT_LEVEL >= 6) {
+               for (block_index = num_blocks; block_index < allocated_blocks.next_slot; ++block_index) {
+                       MSBlockInfo *block = BLOCK_UNTAG (allocated_blocks.data [block_index]);
+                       SGEN_ASSERT (6, block && block->state == BLOCK_STATE_SWEPT, "How did a new block to be swept get added while swept?");
                }
-       } END_FOREACH_BLOCK;
+       }
+
        sgen_pointer_queue_remove_nulls (&allocated_blocks);
 
+       sweep_finish ();
+
+       sweep_job = NULL;
+}
+
+static void
+sweep_finish (void)
+{
+       mword total_evacuate_heap = 0;
+       mword total_evacuate_saved = 0;
+       int i;
+
        for (i = 0; i < num_block_obj_sizes; ++i) {
-               float usage = (float)slots_used [i] / (float)slots_available [i];
-               if (num_blocks [i] > 5 && usage < evacuation_threshold) {
+               float usage = (float)sweep_slots_used [i] / (float)sweep_slots_available [i];
+               if (sweep_num_blocks [i] > 5 && usage < evacuation_threshold) {
                        evacuate_block_obj_sizes [i] = TRUE;
                        /*
                        g_print ("slot size %d - %d of %d used\n",
@@ -1220,22 +1600,43 @@ major_sweep (void)
                        evacuate_block_obj_sizes [i] = FALSE;
                }
                {
-                       mword total_bytes = block_obj_sizes [i] * slots_available [i];
+                       mword total_bytes = block_obj_sizes [i] * sweep_slots_available [i];
                        total_evacuate_heap += total_bytes;
                        if (evacuate_block_obj_sizes [i])
-                               total_evacuate_saved += total_bytes - block_obj_sizes [i] * slots_used [i];
+                               total_evacuate_saved += total_bytes - block_obj_sizes [i] * sweep_slots_used [i];
                }
        }
 
        want_evacuation = (float)total_evacuate_saved / (float)total_evacuate_heap > (1 - concurrent_evacuation_threshold);
 
-       have_swept = TRUE;
+       set_sweep_state (SWEEP_STATE_SWEPT, SWEEP_STATE_COMPACTING);
+}
+
+static void
+major_sweep (void)
+{
+       set_sweep_state (SWEEP_STATE_SWEEPING, SWEEP_STATE_NEED_SWEEPING);
+
+       sweep_start ();
+
+       SGEN_ASSERT (0, num_major_sections == allocated_blocks.next_slot, "We don't know how many blocks we have?");
+
+       num_major_sections_before_sweep = num_major_sections;
+       num_major_sections_freed_in_sweep = 0;
+
+       SGEN_ASSERT (0, !sweep_job, "We haven't finished the last sweep?");
+       if (concurrent_sweep) {
+               sweep_job = sgen_thread_pool_job_alloc ("sweep", sweep_job_func, sizeof (SgenThreadPoolJob));
+               sgen_thread_pool_job_enqueue (sweep_job);
+       } else {
+               sweep_job_func (NULL, NULL);
+       }
 }
 
 static gboolean
-major_have_finished_sweeping (void)
+major_have_swept (void)
 {
-       return have_swept;
+       return sweep_state == SWEEP_STATE_SWEPT;
 }
 
 static int count_pinned_ref;
@@ -1343,15 +1744,20 @@ major_finish_nursery_collection (void)
 #ifdef MARKSWEEP_CONSISTENCY_CHECK
        consistency_check ();
 #endif
-       sgen_register_major_sections_alloced (num_major_sections - old_num_major_sections);
 }
 
 static void
 major_start_major_collection (void)
 {
+       MSBlockInfo *block;
        int i;
 
-       /* clear the free lists */
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Major collection on unswept heap");
+
+       /*
+        * Clear the free lists for block sizes where we do evacuation.  For those block
+        * sizes we will have to allocate new blocks.
+        */
        for (i = 0; i < num_block_obj_sizes; ++i) {
                if (!evacuate_block_obj_sizes [i])
                        continue;
@@ -1360,21 +1766,21 @@ major_start_major_collection (void)
                free_block_lists [MS_BLOCK_FLAG_REFS][i] = NULL;
        }
 
-       // Sweep all unswept blocks
-       if (lazy_sweep) {
-               MSBlockInfo *block;
-
+       if (lazy_sweep)
                MONO_GC_SWEEP_BEGIN (GENERATION_OLD, TRUE);
 
-               FOREACH_BLOCK (block) {
-                       sweep_block (block, TRUE);
-               } END_FOREACH_BLOCK;
+       /* Sweep all unswept blocks and set them to MARKING */
+       FOREACH_BLOCK_NO_LOCK (block) {
+               if (lazy_sweep)
+                       sweep_block (block);
+               SGEN_ASSERT (0, block->state == BLOCK_STATE_SWEPT, "All blocks must be swept when we're pinning.");
+               set_block_state (block, BLOCK_STATE_MARKING, BLOCK_STATE_SWEPT);
+       } END_FOREACH_BLOCK_NO_LOCK;
 
+       if (lazy_sweep)
                MONO_GC_SWEEP_END (GENERATION_OLD, TRUE);
-       }
 
-       SGEN_ASSERT (0, have_swept, "Cannot start major collection without having finished sweeping");
-       have_swept = FALSE;
+       set_sweep_state (SWEEP_STATE_NEED_SWEEPING, SWEEP_STATE_SWEPT);
 }
 
 static void
@@ -1404,12 +1810,16 @@ compare_pointers (const void *va, const void *vb) {
 }
 #endif
 
+/*
+ * This is called with sweep completed and the world stopped.
+ */
 static void
-major_free_swept_blocks (void)
+major_free_swept_blocks (size_t allowance)
 {
-       size_t section_reserve = sgen_get_minor_collection_allowance () / MS_BLOCK_SIZE;
+       /* FIXME: This is probably too much.  It's assuming all objects are small. */
+       size_t section_reserve = allowance / MS_BLOCK_SIZE;
 
-       g_assert (have_swept);
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Sweeping must have finished before freeing blocks");
 
 #if SIZEOF_VOID_P != 8
        {
@@ -1460,7 +1870,7 @@ major_free_swept_blocks (void)
                        for (i = 0; i < arr_length; ++i) {
                                int d = dest;
                                void *block = empty_block_arr [i];
-                               SGEN_ASSERT (0, block, "we're not shifting correctly");
+                               SGEN_ASSERT (6, block, "we're not shifting correctly");
                                if (i != dest) {
                                        empty_block_arr [dest] = block;
                                        /*
@@ -1476,7 +1886,7 @@ major_free_swept_blocks (void)
                                        continue;
                                }
 
-                               SGEN_ASSERT (0, first >= 0 && d > first, "algorithm is wrong");
+                               SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong");
 
                                if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) {
                                        first = d;
@@ -1509,9 +1919,9 @@ major_free_swept_blocks (void)
                                }
                        }
 
-                       SGEN_ASSERT (0, dest <= i && dest <= arr_length, "array length is off");
+                       SGEN_ASSERT (6, dest <= i && dest <= arr_length, "array length is off");
                        arr_length = dest;
-                       SGEN_ASSERT (0, arr_length == num_empty_blocks, "array length is off");
+                       SGEN_ASSERT (6, arr_length == num_empty_blocks, "array length is off");
 
                        num_blocks >>= 1;
                }
@@ -1520,7 +1930,7 @@ major_free_swept_blocks (void)
                rebuild_next = (void**)&empty_blocks;
                for (i = 0; i < arr_length; ++i) {
                        void *block = empty_block_arr [i];
-                       SGEN_ASSERT (0, block, "we're missing blocks");
+                       SGEN_ASSERT (6, block, "we're missing blocks");
                        *rebuild_next = block;
                        rebuild_next = (void**)block;
                }
@@ -1563,13 +1973,13 @@ major_pin_objects (SgenGrayQueue *queue)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                size_t first_entry, last_entry;
-               SGEN_ASSERT (0, block->swept, "All blocks must be swept when we're pinning.");
+               SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning.");
                sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE,
                                &first_entry, &last_entry);
                mark_pinned_objects_in_block (block, first_entry, last_entry, queue);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static void
@@ -1589,23 +1999,42 @@ major_get_used_size (void)
        gint64 size = 0;
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       /*
+        * We're holding the GC lock, but the sweep thread might be running.  Make sure it's
+        * finished, then we can iterate over the block array.
+        */
+       major_finish_sweep_checking ();
+
+       FOREACH_BLOCK_NO_LOCK_CONDITION (TRUE, block) {
                int count = MS_BLOCK_FREE / block->obj_size;
                void **iter;
                size += count * block->obj_size;
                for (iter = block->free_list; iter; iter = (void**)*iter)
                        size -= block->obj_size;
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        return size;
 }
 
+/* FIXME: return number of bytes, not of sections */
 static size_t
 get_num_major_sections (void)
 {
        return num_major_sections;
 }
 
+/*
+ * Returns the number of bytes in blocks that were present when the last sweep was
+ * initiated, and were not freed during the sweep.  They are the basis for calculating the
+ * allowance.
+ */
+static size_t
+get_bytes_survived_last_sweep (void)
+{
+       SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep");
+       return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE;
+}
+
 static gboolean
 major_handle_gc_param (const char *opt)
 {
@@ -1624,6 +2053,12 @@ major_handle_gc_param (const char *opt)
        } else if (!strcmp (opt, "no-lazy-sweep")) {
                lazy_sweep = FALSE;
                return TRUE;
+       } else if (!strcmp (opt, "concurrent-sweep")) {
+               concurrent_sweep = TRUE;
+               return TRUE;
+       } else if (!strcmp (opt, "no-concurrent-sweep")) {
+               concurrent_sweep = FALSE;
+               return TRUE;
        }
 
        return FALSE;
@@ -1636,19 +2071,24 @@ major_print_gc_param_usage (void)
                        ""
                        "  evacuation-threshold=P (where P is a percentage, an integer in 0-100)\n"
                        "  (no-)lazy-sweep\n"
+                       "  (no-)concurrent-sweep\n"
                        );
 }
 
+/*
+ * This callback is used to clear cards, move cards to the shadow table and do counting.
+ */
 static void
 major_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
 {
        MSBlockInfo *block;
        gboolean has_references;
 
-       FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                if (has_references)
                        callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE);
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 #ifdef HEAVY_STATISTICS
@@ -1705,146 +2145,154 @@ card_offset (char *obj, char *base)
 }
 
 static void
-major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
+scan_card_table_for_block (MSBlockInfo *block, gboolean mod_union, ScanObjectFunc scan_func, SgenGrayQueue *queue)
 {
-       MSBlockInfo *block;
-       gboolean has_references;
-       ScanObjectFunc scan_func = sgen_get_current_object_ops ()->scan_object;
-
-       if (!concurrent_mark)
-               g_assert (!mod_union);
-
-       FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
 #ifndef SGEN_HAVE_OVERLAPPING_CARDS
-               guint8 cards_copy [CARDS_PER_BLOCK];
+       guint8 cards_copy [CARDS_PER_BLOCK];
 #endif
-               gboolean small_objects;
-               int block_obj_size;
-               char *block_start;
-               guint8 *card_data, *card_base;
-               guint8 *card_data_end;
-               char *scan_front = NULL;
+       gboolean small_objects;
+       int block_obj_size;
+       char *block_start;
+       guint8 *card_data, *card_base;
+       guint8 *card_data_end;
+       char *scan_front = NULL;
 
-#ifdef PREFETCH_CARDS
-               int prefetch_index = __index + 6;
-               if (prefetch_index < allocated_blocks.next_slot) {
-                       MSBlockInfo *prefetch_block = BLOCK_UNTAG_HAS_REFERENCES (allocated_blocks.data [prefetch_index]);
-                       guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
-                       PREFETCH_READ (prefetch_block);
-                       PREFETCH_WRITE (prefetch_cards);
-                       PREFETCH_WRITE (prefetch_cards + 32);
-                }
-#endif
+       block_obj_size = block->obj_size;
+       small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
 
-               if (!has_references)
-                       continue;
-
-               block_obj_size = block->obj_size;
-               small_objects = block_obj_size < CARD_SIZE_IN_BYTES;
-
-               block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
+       block_start = MS_BLOCK_FOR_BLOCK_INFO (block);
 
+       /*
+        * This is safe in face of card aliasing for the following reason:
+        *
+        * Major blocks are 16k aligned, or 32 cards aligned.
+        * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
+        * sizes, they won't overflow the cardtable overlap modulus.
+        */
+       if (mod_union) {
+               card_data = card_base = block->cardtable_mod_union;
                /*
-                * This is safe in face of card aliasing for the following reason:
-                *
-                * Major blocks are 16k aligned, or 32 cards aligned.
-                * Cards aliasing happens in powers of two, so as long as major blocks are aligned to their
-                * sizes, they won't overflow the cardtable overlap modulus.
+                * This happens when the nursery collection that precedes finishing
+                * the concurrent collection allocates new major blocks.
                 */
-               if (mod_union) {
-                       card_data = card_base = block->cardtable_mod_union;
-                       /*
-                        * This happens when the nursery collection that precedes finishing
-                        * the concurrent collection allocates new major blocks.
-                        */
-                       if (!card_data)
-                               continue;
-               } else {
+               if (!card_data)
+                       return;
+       } else {
 #ifdef SGEN_HAVE_OVERLAPPING_CARDS
-                       card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
+               card_data = card_base = sgen_card_table_get_card_scan_address ((mword)block_start);
 #else
-                       if (!sgen_card_table_get_card_data (cards_copy, (mword)block_start, CARDS_PER_BLOCK))
-                               continue;
-                       card_data = card_base = cards_copy;
+               if (!sgen_card_table_get_card_data (cards_copy, (mword)block_start, CARDS_PER_BLOCK))
+                       return;
+               card_data = card_base = cards_copy;
 #endif
-               }
-               card_data_end = card_data + CARDS_PER_BLOCK;
+       }
+       card_data_end = card_data + CARDS_PER_BLOCK;
 
-               card_data += MS_BLOCK_SKIP >> CARD_BITS;
+       card_data += MS_BLOCK_SKIP >> CARD_BITS;
 
-               card_data = initial_skip_card (card_data);
-               while (card_data < card_data_end) {
-                       size_t card_index, first_object_index;
-                       char *start;
-                       char *end;
-                       char *first_obj, *obj;
+       card_data = initial_skip_card (card_data);
+       while (card_data < card_data_end) {
+               size_t card_index, first_object_index;
+               char *start;
+               char *end;
+               char *first_obj, *obj;
 
-                       HEAVY_STAT (++scanned_cards);
+               HEAVY_STAT (++scanned_cards);
 
-                       if (!*card_data) {
-                               ++card_data;
-                               continue;
-                       }
+               if (!*card_data) {
+                       ++card_data;
+                       continue;
+               }
 
-                       card_index = card_data - card_base;
-                       start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
-                       end = start + CARD_SIZE_IN_BYTES;
+               card_index = card_data - card_base;
+               start = (char*)(block_start + card_index * CARD_SIZE_IN_BYTES);
+               end = start + CARD_SIZE_IN_BYTES;
 
-                       if (!block->swept)
-                               sweep_block (block, FALSE);
+               if (!block_is_swept_or_marking (block))
+                       sweep_block (block);
 
-                       HEAVY_STAT (++marked_cards);
+               HEAVY_STAT (++marked_cards);
 
-                       if (small_objects)
-                               sgen_card_table_prepare_card_for_scanning (card_data);
+               if (small_objects)
+                       sgen_card_table_prepare_card_for_scanning (card_data);
 
-                       /*
-                        * If the card we're looking at starts at or in the block header, we
-                        * must start at the first object in the block, without calculating
-                        * the index of the object we're hypothetically starting at, because
-                        * it would be negative.
-                        */
-                       if (card_index <= (MS_BLOCK_SKIP >> CARD_BITS))
-                               first_object_index = 0;
-                       else
-                               first_object_index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
+               /*
+                * If the card we're looking at starts at or in the block header, we
+                * must start at the first object in the block, without calculating
+                * the index of the object we're hypothetically starting at, because
+                * it would be negative.
+                */
+               if (card_index <= (MS_BLOCK_SKIP >> CARD_BITS))
+                       first_object_index = 0;
+               else
+                       first_object_index = MS_BLOCK_OBJ_INDEX_FAST (start, block_start, block_obj_size);
 
-                       obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
+               obj = first_obj = (char*)MS_BLOCK_OBJ_FAST (block_start, block_obj_size, first_object_index);
 
-                       while (obj < end) {
-                               if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
-                                       goto next_object;
+               binary_protocol_card_scan (first_obj, end - first_obj);
 
-                               if (mod_union) {
-                                       /* FIXME: do this more efficiently */
-                                       int w, b;
-                                       MS_CALC_MARK_BIT (w, b, obj);
-                                       if (!MS_MARK_BIT (block, w, b))
-                                               goto next_object;
-                               }
+               while (obj < end) {
+                       if (obj < scan_front || !MS_OBJ_ALLOCED_FAST (obj, block_start))
+                               goto next_object;
 
-                               if (small_objects) {
-                                       HEAVY_STAT (++scanned_objects);
-                                       scan_func (obj, sgen_obj_get_descriptor (obj), queue);
-                               } else {
-                                       size_t offset = card_offset (obj, block_start);
-                                       sgen_cardtable_scan_object (obj, block_obj_size, card_base + offset, mod_union, queue);
-                               }
-                       next_object:
-                               obj += block_obj_size;
-                               g_assert (scan_front <= obj);
-                               scan_front = obj;
+                       if (mod_union) {
+                               /* FIXME: do this more efficiently */
+                               int w, b;
+                               MS_CALC_MARK_BIT (w, b, obj);
+                               if (!MS_MARK_BIT (block, w, b))
+                                       goto next_object;
                        }
 
-                       HEAVY_STAT (if (*card_data) ++remarked_cards);
-                       binary_protocol_card_scan (first_obj, obj - first_obj);
-
-                       if (small_objects)
-                               ++card_data;
-                       else
-                               card_data = card_base + card_offset (obj, block_start);
+                       if (small_objects) {
+                               HEAVY_STAT (++scanned_objects);
+                               scan_func (obj, sgen_obj_get_descriptor (obj), queue);
+                       } else {
+                               size_t offset = card_offset (obj, block_start);
+                               sgen_cardtable_scan_object (obj, block_obj_size, card_base + offset, mod_union, queue);
+                       }
+               next_object:
+                       obj += block_obj_size;
+                       g_assert (scan_front <= obj);
+                       scan_front = obj;
                }
-       } END_FOREACH_BLOCK;
+
+               HEAVY_STAT (if (*card_data) ++remarked_cards);
+
+               if (small_objects)
+                       ++card_data;
+               else
+                       card_data = card_base + card_offset (obj, block_start);
+       }
+}
+
+static void
+major_scan_card_table (gboolean mod_union, SgenGrayQueue *queue)
+{
+       ScanObjectFunc scan_func = sgen_get_current_object_ops ()->scan_object;
+       MSBlockInfo *block;
+       gboolean has_references;
+
+       if (!concurrent_mark)
+               g_assert (!mod_union);
+
+       major_finish_sweep_checking ();
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
+#ifdef PREFETCH_CARDS
+               int prefetch_index = __index + 6;
+               if (prefetch_index < allocated_blocks.next_slot) {
+                       MSBlockInfo *prefetch_block = BLOCK_UNTAG (allocated_blocks.data [prefetch_index]);
+                       guint8 *prefetch_cards = sgen_card_table_get_card_scan_address ((mword)MS_BLOCK_FOR_BLOCK_INFO (prefetch_block));
+                       PREFETCH_READ (prefetch_block);
+                       PREFETCH_WRITE (prefetch_cards);
+                       PREFETCH_WRITE (prefetch_cards + 32);
+                }
+#endif
+
+               if (!has_references)
+                       continue;
+
+               scan_card_table_for_block (block, mod_union, scan_func, queue);
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static void
@@ -1855,7 +2303,13 @@ major_count_cards (long long *num_total_cards, long long *num_marked_cards)
        long long total_cards = 0;
        long long marked_cards = 0;
 
-       FOREACH_BLOCK_HAS_REFERENCES (block, has_references) {
+       if (sweep_in_progress ()) {
+               *num_total_cards = -1;
+               *num_marked_cards = -1;
+               return;
+       }
+
+       FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
                guint8 *cards = sgen_card_table_get_card_scan_address ((mword) MS_BLOCK_FOR_BLOCK_INFO (block));
                int i;
 
@@ -1867,7 +2321,7 @@ major_count_cards (long long *num_total_cards, long long *num_marked_cards)
                        if (cards [i])
                                ++marked_cards;
                }
-       } END_FOREACH_BLOCK;
+       } END_FOREACH_BLOCK_NO_LOCK;
 
        *num_total_cards = total_cards;
        *num_marked_cards = marked_cards;
@@ -1878,14 +2332,14 @@ update_cardtable_mod_union (void)
 {
        MSBlockInfo *block;
 
-       FOREACH_BLOCK (block) {
+       FOREACH_BLOCK_NO_LOCK (block) {
                size_t num_cards;
 
                block->cardtable_mod_union = sgen_card_table_update_mod_union (block->cardtable_mod_union,
                                MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards);
 
-               SGEN_ASSERT (0, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
-       } END_FOREACH_BLOCK;
+               SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong");
+       } END_FOREACH_BLOCK_NO_LOCK;
 }
 
 static guint8*
@@ -1896,20 +2350,13 @@ major_get_cardtable_mod_union_for_object (char *obj)
        return &block->cardtable_mod_union [offset];
 }
 
-static void
-alloc_free_block_lists (MSBlockInfo ***lists)
-{
-       int i;
-       for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i)
-               lists [i] = sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
-}
-
 #undef pthread_create
 
 static void
 post_param_init (SgenMajorCollector *collector)
 {
        collector->sweeps_lazily = lazy_sweep;
+       collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
 }
 
 static void
@@ -1927,6 +2374,10 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        for (i = 0; i < num_block_obj_sizes; ++i)
                evacuate_block_obj_sizes [i] = FALSE;
 
+       sweep_slots_available = sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
+       sweep_slots_used = sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
+       sweep_num_blocks = sgen_alloc_internal_dynamic (sizeof (size_t) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
+
        /*
        {
                int i;
@@ -1936,7 +2387,8 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        }
        */
 
-       alloc_free_block_lists (free_block_lists);
+       for (i = 0; i < MS_BLOCK_TYPE_MAX; ++i)
+               free_block_lists [i] = sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
 
        for (i = 0; i < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES; ++i)
                fast_block_obj_size_indexes [i] = ms_find_block_obj_size_index (i * 8);
@@ -1957,13 +2409,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->section_size = MAJOR_SECTION_SIZE;
 
        concurrent_mark = is_concurrent;
-       if (is_concurrent) {
-               collector->is_concurrent = TRUE;
+       collector->is_concurrent = is_concurrent;
+       collector->needs_thread_pool = is_concurrent || concurrent_sweep;
+       if (is_concurrent)
                collector->want_synchronous_collection = &want_evacuation;
-       } else {
-               collector->is_concurrent = FALSE;
+       else
                collector->want_synchronous_collection = NULL;
-       }
        collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
        collector->supports_cardtable = TRUE;
 
@@ -1986,7 +2437,8 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        }
        collector->init_to_space = major_init_to_space;
        collector->sweep = major_sweep;
-       collector->have_finished_sweeping = major_have_finished_sweeping;
+       collector->have_swept = major_have_swept;
+       collector->finish_sweeping = major_finish_sweep_checking;
        collector->free_swept_blocks = major_free_swept_blocks;
        collector->check_scan_starts = major_check_scan_starts;
        collector->dump_heap = major_dump_heap;
@@ -1999,6 +2451,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        collector->obj_is_from_pinned_alloc = obj_is_from_pinned_alloc;
        collector->report_pinned_memory_usage = major_report_pinned_memory_usage;
        collector->get_num_major_sections = get_num_major_sections;
+       collector->get_bytes_survived_last_sweep = get_bytes_survived_last_sweep;
        collector->handle_gc_param = major_handle_gc_param;
        collector->print_gc_param_usage = major_print_gc_param_usage;
        collector->post_param_init = post_param_init;
@@ -2015,7 +2468,6 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr
        }
 
 #if !defined (FIXED_HEAP) && !defined (SGEN_PARALLEL_MARK)
-       /* FIXME: this will not work with evacuation or the split nursery. */
        if (!is_concurrent)
                collector->drain_gray_stack = drain_gray_stack;
 
index 63b01840674b0f1136a648715cfad9fd98b9ab71..70cc6f0788329a1d2c27525d9bd5e2ba5162bec4 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "metadata/sgen-gc.h"
 #include "metadata/sgen-memory-governor.h"
+#include "metadata/sgen-thread-pool.h"
 #include "metadata/mono-gc.h"
 
 #include "utils/mono-counters.h"
@@ -57,18 +58,15 @@ static gboolean debug_print_allowance = FALSE;
 
 /* use this to tune when to do a major/minor collection */
 static mword memory_pressure = 0;
-static mword minor_collection_allowance;
-static mword minor_collection_sections_alloced = 0;
+static mword major_collection_trigger_size;
 
 static mword last_major_num_sections = 0;
 static mword last_los_memory_usage = 0;
 
 static gboolean need_calculate_minor_collection_allowance;
 
-static mword last_collection_old_num_major_sections;
+/* The size of the LOS after the last major collection, after sweeping. */
 static mword last_collection_los_memory_usage = 0;
-static mword last_collection_old_los_memory_usage;
-static mword last_collection_los_memory_alloced;
 
 static mword sgen_memgov_available_free_space (void);
 
@@ -76,26 +74,16 @@ static mword sgen_memgov_available_free_space (void);
 /* GC trigger heuristics. */
 
 static void
-sgen_memgov_try_calculate_minor_collection_allowance (gboolean overwrite)
+sgen_memgov_calculate_minor_collection_allowance (void)
 {
-       size_t num_major_sections;
-       mword new_major, new_heap_size, allowance_target;
-
-       if (overwrite)
-               g_assert (need_calculate_minor_collection_allowance);
+       size_t new_major, new_heap_size, allowance_target, allowance;
 
        if (!need_calculate_minor_collection_allowance)
                return;
 
-       if (!major_collector.have_finished_sweeping ()) {
-               if (overwrite)
-                       minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE;
-               return;
-       }
-
-       num_major_sections = major_collector.get_num_major_sections ();
+       SGEN_ASSERT (0, major_collector.have_swept (), "Can only calculate allowance if heap is swept");
 
-       new_major = num_major_sections * major_collector.section_size;
+       new_major = major_collector.get_bytes_survived_last_sweep ();
        new_heap_size = new_major + last_collection_los_memory_usage;
 
        /*
@@ -104,47 +92,55 @@ sgen_memgov_try_calculate_minor_collection_allowance (gboolean overwrite)
         */
        allowance_target = new_heap_size / 3;
 
-       minor_collection_allowance = MAX (allowance_target, MIN_MINOR_COLLECTION_ALLOWANCE);
+       allowance = MAX (allowance_target, MIN_MINOR_COLLECTION_ALLOWANCE);
 
-       if (new_heap_size + minor_collection_allowance > soft_heap_limit) {
+       if (new_heap_size + allowance > soft_heap_limit) {
                if (new_heap_size > soft_heap_limit)
-                       minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE;
+                       allowance = MIN_MINOR_COLLECTION_ALLOWANCE;
                else
-                       minor_collection_allowance = MAX (soft_heap_limit - new_heap_size, MIN_MINOR_COLLECTION_ALLOWANCE);
-       }
-
-       if (debug_print_allowance) {
-               mword old_major = last_collection_old_num_major_sections * major_collector.section_size;
-
-               SGEN_LOG (1, "Before collection: %ld bytes (%ld major, %ld LOS)",
-                                 (long)(old_major + last_collection_old_los_memory_usage), (long)old_major, (long)last_collection_old_los_memory_usage);
-               SGEN_LOG (1, "After collection: %ld bytes (%ld major, %ld LOS)",
-                                 (long)new_heap_size, (long)new_major, (long)last_collection_los_memory_usage);
-               SGEN_LOG (1, "Allowance: %ld bytes", (long)minor_collection_allowance);
+                       allowance = MAX (soft_heap_limit - new_heap_size, MIN_MINOR_COLLECTION_ALLOWANCE);
        }
 
+       /* FIXME: Why is this here? */
        if (major_collector.free_swept_blocks)
-               major_collector.free_swept_blocks ();
+               major_collector.free_swept_blocks (allowance);
+
+       major_collection_trigger_size = new_heap_size + allowance;
 
        need_calculate_minor_collection_allowance = FALSE;
-}
 
+       if (debug_print_allowance) {
+               SGEN_LOG (0, "Surviving sweep: %ld bytes (%ld major, %ld LOS)", (long)new_heap_size, (long)new_major, (long)last_collection_los_memory_usage);
+               SGEN_LOG (0, "Allowance: %ld bytes", (long)allowance);
+               SGEN_LOG (0, "Trigger size: %ld bytes", (long)major_collection_trigger_size);
+       }
+}
 
 gboolean
 sgen_need_major_collection (mword space_needed)
 {
-       mword los_alloced;
+       size_t heap_size;
+
        if (sgen_concurrent_collection_in_progress ())
                return FALSE;
-       los_alloced = los_memory_usage - MIN (last_collection_los_memory_usage, los_memory_usage);
-       return (space_needed > sgen_memgov_available_free_space ()) ||
-               minor_collection_sections_alloced * major_collector.section_size + los_alloced > minor_collection_allowance;
+
+       /* FIXME: This is a cop-out.  We should have some way of figuring this out. */
+       if (!major_collector.have_swept ())
+               return FALSE;
+
+       if (space_needed > sgen_memgov_available_free_space ())
+               return TRUE;
+
+       sgen_memgov_calculate_minor_collection_allowance ();
+
+       heap_size = major_collector.get_num_major_sections () * major_collector.section_size + los_memory_usage;
+
+       return heap_size > major_collection_trigger_size;
 }
 
 void
 sgen_memgov_minor_collection_start (void)
 {
-       sgen_memgov_try_calculate_minor_collection_allowance (FALSE);
 }
 
 void
@@ -155,32 +151,22 @@ sgen_memgov_minor_collection_end (void)
 void
 sgen_memgov_major_collection_start (void)
 {
-       last_collection_old_num_major_sections = sgen_get_major_collector ()->get_num_major_sections ();
-
-       /*
-        * A domain could have been freed, resulting in
-        * los_memory_usage being less than last_collection_los_memory_usage.
-        */
-       last_collection_los_memory_alloced = los_memory_usage - MIN (last_collection_los_memory_usage, los_memory_usage);
-       last_collection_old_los_memory_usage = los_memory_usage;
-
        need_calculate_minor_collection_allowance = TRUE;
+
+       if (debug_print_allowance) {
+               SGEN_LOG (0, "Starting collection with heap size %ld bytes", (long)(major_collector.get_num_major_sections () * major_collector.section_size + los_memory_usage));
+       }
 }
 
 void
-sgen_memgov_major_collection_end (void)
+sgen_memgov_major_collection_end (gboolean forced)
 {
-       sgen_memgov_try_calculate_minor_collection_allowance (TRUE);
-
-       minor_collection_sections_alloced = 0;
        last_collection_los_memory_usage = los_memory_usage;
-}
 
-void
-sgen_memgov_collection_start (int generation)
-{
-       last_major_num_sections = major_collector.get_num_major_sections ();
-       last_los_memory_usage = los_memory_usage;
+       if (forced) {
+               sgen_get_major_collector ()->finish_sweeping ();
+               sgen_memgov_calculate_minor_collection_allowance ();
+       }
 }
 
 static void
@@ -214,6 +200,12 @@ log_timming (GGTimingInfo *info)
                        los_memory_usage / 1024);       
 }
 
+/* FIXME: Remove either these or the specialized ones above. */
+void
+sgen_memgov_collection_start (int generation)
+{
+}
+
 void
 sgen_memgov_collection_end (int generation, GGTimingInfo* info, int info_count)
 {
@@ -224,18 +216,6 @@ sgen_memgov_collection_end (int generation, GGTimingInfo* info, int info_count)
        }
 }
 
-void
-sgen_register_major_sections_alloced (size_t num_sections)
-{
-       minor_collection_sections_alloced += num_sections;
-}
-
-mword
-sgen_get_minor_collection_allowance (void)
-{
-       return minor_collection_allowance;
-}
-
 /* Memory pressure API */
 
 /* Negative value to remove */
@@ -356,7 +336,7 @@ gboolean
 sgen_memgov_try_alloc_space (mword size, int space)
 {
        if (sgen_memgov_available_free_space () < size) {
-               SGEN_ASSERT (4, !sgen_is_worker_thread (mono_native_thread_id_get ()), "Memory shouldn't run out in worker thread");
+               SGEN_ASSERT (4, !sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "Memory shouldn't run out in worker thread");
                return FALSE;
        }
 
@@ -372,7 +352,7 @@ sgen_memgov_init (size_t max_heap, size_t soft_limit, gboolean debug_allowance,
                soft_heap_limit = soft_limit;
 
        debug_print_allowance = debug_allowance;
-       minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE;
+       major_collection_trigger_size = MIN_MINOR_COLLECTION_ALLOWANCE;
 
        mono_counters_register ("Memgov alloc", MONO_COUNTER_GC | MONO_COUNTER_WORD | MONO_COUNTER_BYTES | MONO_COUNTER_VARIABLE, &total_alloc);
        mono_counters_register ("Memgov max alloc", MONO_COUNTER_GC | MONO_COUNTER_WORD | MONO_COUNTER_BYTES | MONO_COUNTER_MONOTONIC, &total_alloc_max);
@@ -396,7 +376,6 @@ sgen_memgov_init (size_t max_heap, size_t soft_limit, gboolean debug_allowance,
 
        if (save_target)
                save_target_ratio = save_target;
-       minor_collection_allowance = MIN_MINOR_COLLECTION_ALLOWANCE;
 }
 
 #endif
index 16b9ac358a98265151aabe106668e3a56c594804..0115ec6e0605e2755c4b28dc8c020b89f8ea9466 100644 (file)
@@ -34,13 +34,11 @@ void sgen_memgov_minor_collection_start (void);
 void sgen_memgov_minor_collection_end (void);
 
 void sgen_memgov_major_collection_start (void);
-void sgen_memgov_major_collection_end (void);
+void sgen_memgov_major_collection_end (gboolean forced);
 
 void sgen_memgov_collection_start (int generation);
 void sgen_memgov_collection_end (int generation, GGTimingInfo* info, int info_count);
 
-void sgen_register_major_sections_alloced (size_t num_sections);
-mword sgen_get_minor_collection_allowance (void);
 gboolean sgen_need_major_collection (mword space_needed);
 
 
index 7d599e20908e59e6559b0e1bafbaa41e01e76f4f..403d04c3d9a56f95560c27f08f8b55e2d0a0bdd9 100644 (file)
@@ -31,6 +31,7 @@
 #include "metadata/sgen-gc.h"
 #include "metadata/sgen-archdep.h"
 #include "metadata/sgen-protocol.h"
+#include "metadata/sgen-thread-pool.h"
 #include "metadata/object-internals.h"
 #include "metadata/gc-internal.h"
 
@@ -116,7 +117,7 @@ sgen_thread_handshake (BOOL suspend)
 
        cur_thread->suspend_done = TRUE;
        FOREACH_THREAD_SAFE (info) {
-               if (info == cur_thread || sgen_is_worker_thread (mono_thread_info_get_tid (info)))
+               if (info == cur_thread || sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info)))
                        continue;
 
                info->suspend_done = FALSE;
index e201e12751b3eb9b427a28262af9970ef84a4c8b..13cb5ee54134df4554e6b3d9593a3c93448bf589 100644 (file)
@@ -51,10 +51,16 @@ realloc_queue (SgenPointerQueue *queue)
        SGEN_LOG (4, "Reallocated pointer queue to size: %lu", new_size);
 }
 
+gboolean
+sgen_pointer_queue_will_grow (SgenPointerQueue *queue)
+{
+       return queue->next_slot >= queue->size;
+}
+
 void
 sgen_pointer_queue_add (SgenPointerQueue *queue, void *ptr)
 {
-       if (queue->next_slot >= queue->size)
+       if (sgen_pointer_queue_will_grow (queue))
                realloc_queue (queue);
 
        queue->data [queue->next_slot++] = ptr;
index 303562e974cf4219f706bb190b0906fd1b6b7698..2f8b4cc3874ea3585566402e1653790a77c24bad 100644 (file)
@@ -39,5 +39,6 @@ void sgen_pointer_queue_init (SgenPointerQueue *queue, int mem_type);
 void* sgen_pointer_queue_pop (SgenPointerQueue *queue);
 gboolean sgen_pointer_queue_is_empty (SgenPointerQueue *queue);
 void sgen_pointer_queue_free (SgenPointerQueue *queue);
+gboolean sgen_pointer_queue_will_grow (SgenPointerQueue *queue);
 
 #endif
index bd449e10dfc12e7e41d30ada3a1225f9e2deed14..f9196ffe5b99a11a9899f6be2eb335f24387ec30 100644 (file)
@@ -71,6 +71,27 @@ MATCH_INDEX (BINARY_PROTOCOL_MATCH)
 IS_VTABLE_MATCH (FALSE)
 END_PROTOCOL_ENTRY
 
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_block_alloc, TYPE_POINTER, addr, TYPE_SIZE, size)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (FALSE)
+MATCH_INDEX (matches_interval (ptr, entry->addr, entry->size) ? 0 : BINARY_PROTOCOL_NO_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY_HEAVY
+
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_block_free, TYPE_POINTER, addr, TYPE_SIZE, size)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (FALSE)
+MATCH_INDEX (matches_interval (ptr, entry->addr, entry->size) ? 0 : BINARY_PROTOCOL_NO_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY_HEAVY
+
+BEGIN_PROTOCOL_ENTRY_HEAVY4 (binary_protocol_block_set_state, TYPE_POINTER, addr, TYPE_SIZE, size, TYPE_INT, old, TYPE_INT, new)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (FALSE)
+MATCH_INDEX (matches_interval (ptr, entry->addr, entry->size) ? 0 : BINARY_PROTOCOL_NO_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY_HEAVY
+
 BEGIN_PROTOCOL_ENTRY_HEAVY3 (binary_protocol_alloc, TYPE_POINTER, obj, TYPE_POINTER, vtable, TYPE_INT, size)
 DEFAULT_PRINT ()
 IS_ALWAYS_MATCH (FALSE)
index 22f70c3041784664a3658c7ba4b4a282b7c67c70..bbabb8ee36d78d9a48b9b897173eec52003ace2e 100644 (file)
@@ -27,6 +27,7 @@
 #include "sgen-gc.h"
 #include "sgen-protocol.h"
 #include "sgen-memory-governor.h"
+#include "sgen-thread-pool.h"
 #include "utils/mono-mmap.h"
 #include "utils/mono-threads.h"
 
@@ -293,7 +294,7 @@ protocol_entry (unsigned char type, gpointer data, int size)
        if (binary_protocol_file == -1)
                return;
 
-       if (sgen_is_worker_thread (mono_native_thread_id_get ()))
+       if (sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()))
                type |= 0x80;
 
        lock_recursive ();
index b994e0bdf489bfe41fa7fa5a2a030ee1cf5564af..9078f6bb07a1256f3c7c4d7810944b67b2ce7c5d 100644 (file)
 #include "metadata/sgen-gc.h"
 #include "metadata/sgen-protocol.h"
 #include "metadata/sgen-memory-governor.h"
+#include "metadata/sgen-thread-pool.h"
 #include "metadata/profiler-private.h"
 #include "utils/mono-time.h"
 #include "utils/dtrace.h"
 #include "utils/mono-counters.h"
 #include "utils/mono-threads.h"
 
+static gboolean world_is_stopped = FALSE;
+
 #define TV_DECLARE SGEN_TV_DECLARE
 #define TV_GETTIME SGEN_TV_GETTIME
 #define TV_ELAPSED SGEN_TV_ELAPSED
@@ -216,6 +219,8 @@ sgen_stop_world (int generation)
        TV_DECLARE (end_handshake);
        int count, dead;
 
+       SGEN_ASSERT (0, !world_is_stopped, "Why are we stopping a stopped world?");
+
        mono_profiler_gc_event (MONO_GC_EVENT_PRE_STOP_WORLD, generation);
        MONO_GC_WORLD_STOP_BEGIN ();
        binary_protocol_world_stopping (sgen_timestamp ());
@@ -240,6 +245,8 @@ sgen_stop_world (int generation)
                count -= dead;
        }
 
+       world_is_stopped = TRUE;
+
        SGEN_LOG (3, "world stopped %d thread(s)", count);
        mono_profiler_gc_event (MONO_GC_EVENT_POST_STOP_WORLD, generation);
        MONO_GC_WORLD_STOP_END ();
@@ -271,6 +278,8 @@ sgen_restart_world (int generation, GGTimingInfo *timing)
        TV_DECLARE (end_bridge);
        unsigned long usec, bridge_usec;
 
+       SGEN_ASSERT (0, world_is_stopped, "Why are we restarting a running world?");
+
        if (binary_protocol_is_enabled ()) {
                long long major_total = -1, major_marked = -1, los_total = -1, los_marked = -1;
                if (binary_protocol_is_heavy_enabled ())
@@ -305,6 +314,9 @@ sgen_restart_world (int generation, GGTimingInfo *timing)
        time_restart_world += TV_ELAPSED (start_handshake, end_sw);
        usec = TV_ELAPSED (stop_world_time, end_sw);
        max_pause_usec = MAX (usec, max_pause_usec);
+
+       world_is_stopped = FALSE;
+
        SGEN_LOG (2, "restarted %d thread(s) (pause time: %d usec, max: %d)", count, (int)usec, (int)max_pause_usec);
        mono_profiler_gc_event (MONO_GC_EVENT_POST_START_WORLD, generation);
        MONO_GC_WORLD_RESTART_END (generation);
@@ -340,6 +352,12 @@ sgen_restart_world (int generation, GGTimingInfo *timing)
        return count;
 }
 
+gboolean
+sgen_is_world_stopped (void)
+{
+       return world_is_stopped;
+}
+
 void
 sgen_init_stw (void)
 {
@@ -380,7 +398,7 @@ sgen_is_thread_in_current_stw (SgenThreadInfo *info)
        We can't suspend the workers that will do all the heavy lifting.
        FIXME Use some state bit in SgenThreadInfo for this.
        */
-       if (sgen_is_worker_thread (mono_thread_info_get_tid (info))) {
+       if (sgen_thread_pool_is_thread_pool_thread (mono_thread_info_get_tid (info))) {
                return FALSE;
        }
 
index 3d63e94bfcd89bb3678b5c5bbae166e594a5ad34..2d55abbbcc19839bf2e6348f6a683ffd6c5c123a 100644 (file)
@@ -37,6 +37,7 @@
 #define SGEN_POINTER_TAG_4(p)          ((void*)((mword)(p) | 4))
 #define SGEN_POINTER_UNTAG_4(p)                ((void*)((mword)(p) & ~4))
 
+#define SGEN_POINTER_UNTAG_12(p)       ((void*)((mword)(p) & ~3))
 #define SGEN_POINTER_UNTAG_24(p)       ((void*)((mword)(p) & ~6))
 
 #define SGEN_POINTER_IS_TAGGED_ANY(p)  ((mword)(p) & SGEN_TAGGED_POINTER_MASK)
diff --git a/mono/metadata/sgen-thread-pool.c b/mono/metadata/sgen-thread-pool.c
new file mode 100644 (file)
index 0000000..71d8d70
--- /dev/null
@@ -0,0 +1,245 @@
+/*
+ * sgen-thread-pool.c: Threadpool for all concurrent GC work.
+ *
+ * Copyright (C) 2015 Xamarin Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License 2.0 as published by the Free Software Foundation;
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License 2.0 along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "config.h"
+#ifdef HAVE_SGEN_GC
+
+#include "mono/metadata/sgen-gc.h"
+#include "mono/metadata/sgen-thread-pool.h"
+#include "mono/metadata/sgen-pointer-queue.h"
+#include "mono/utils/mono-mutex.h"
+#include "mono/utils/mono-threads.h"
+
+static mono_mutex_t lock;
+static mono_cond_t work_cond;
+static mono_cond_t done_cond;
+
+static MonoNativeThreadId thread;
+
+/* Only accessed with the lock held. */
+static SgenPointerQueue job_queue;
+static volatile gboolean idle_working;
+
+static SgenThreadPoolThreadInitFunc thread_init_func;
+static SgenThreadPoolIdleJobFunc idle_job_func;
+
+enum {
+       STATE_WAITING,
+       STATE_IN_PROGRESS,
+       STATE_DONE
+};
+
+/* Assumes that the lock is held. */
+static SgenThreadPoolJob*
+get_job_and_set_in_progress (void)
+{
+       for (size_t i = 0; i < job_queue.next_slot; ++i) {
+               SgenThreadPoolJob *job = job_queue.data [i];
+               if (job->state == STATE_WAITING) {
+                       job->state = STATE_IN_PROGRESS;
+                       return job;
+               }
+       }
+       return NULL;
+}
+
+/* Assumes that the lock is held. */
+static ssize_t
+find_job_in_queue (SgenThreadPoolJob *job)
+{
+       for (ssize_t i = 0; i < job_queue.next_slot; ++i) {
+               if (job_queue.data [i] == job)
+                       return i;
+       }
+       return -1;
+}
+
+/* Assumes that the lock is held. */
+static void
+remove_job (SgenThreadPoolJob *job)
+{
+       ssize_t index;
+       SGEN_ASSERT (0, job->state == STATE_DONE, "Why are we removing a job that's not done?");
+       index = find_job_in_queue (job);
+       SGEN_ASSERT (0, index >= 0, "Why is the job we're trying to remove not in the queue?");
+       job_queue.data [index] = NULL;
+       sgen_pointer_queue_remove_nulls (&job_queue);
+       sgen_thread_pool_job_free (job);
+}
+
+static mono_native_thread_return_t
+thread_func (void *thread_data)
+{
+       thread_init_func (thread_data);
+
+       mono_mutex_lock (&lock);
+       for (;;) {
+               SgenThreadPoolJob *job;
+               gboolean do_idle = idle_working;
+
+               job = get_job_and_set_in_progress ();
+               if (!job && !do_idle) {
+                       mono_cond_wait (&work_cond, &lock);
+                       do_idle = idle_working;
+                       job = get_job_and_set_in_progress ();
+               }
+
+               mono_mutex_unlock (&lock);
+
+               if (job) {
+                       job->func (thread_data, job);
+
+                       mono_mutex_lock (&lock);
+
+                       SGEN_ASSERT (0, job->state == STATE_IN_PROGRESS, "The job should still be in progress.");
+                       job->state = STATE_DONE;
+                       remove_job (job);
+                       /*
+                        * Only the main GC thread will ever wait on the done condition, so we don't
+                        * have to broadcast.
+                        */
+                       mono_cond_signal (&done_cond);
+               } else {
+                       SGEN_ASSERT (0, do_idle, "Why did we unlock if we still have to wait for idle?");
+                       SGEN_ASSERT (0, idle_job_func, "Why do we have idle work when there's no idle job function?");
+                       do {
+                               do_idle = idle_job_func (thread_data);
+                       } while (do_idle && !job_queue.next_slot);
+
+                       mono_mutex_lock (&lock);
+
+                       if (!do_idle) {
+                               idle_working = FALSE;
+                               mono_cond_signal (&done_cond);
+                       }
+               }
+       }
+}
+
+void
+sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, void **thread_datas)
+{
+       SGEN_ASSERT (0, num_threads == 1, "We only support 1 thread pool thread for now.");
+
+       mono_mutex_init (&lock);
+       mono_cond_init (&work_cond, NULL);
+       mono_cond_init (&done_cond, NULL);
+
+       thread_init_func = init_func;
+       idle_job_func = idle_func;
+       idle_working = idle_func != NULL;
+
+       mono_native_thread_create (&thread, thread_func, thread_datas ? thread_datas [0] : NULL);
+}
+
+SgenThreadPoolJob*
+sgen_thread_pool_job_alloc (const char *name, SgenThreadPoolJobFunc func, size_t size)
+{
+       SgenThreadPoolJob *job = sgen_alloc_internal_dynamic (size, INTERNAL_MEM_THREAD_POOL_JOB, TRUE);
+       job->name = name;
+       job->size = size;
+       job->state = STATE_WAITING;
+       job->func = func;
+       return job;
+}
+
+void
+sgen_thread_pool_job_free (SgenThreadPoolJob *job)
+{
+       sgen_free_internal_dynamic (job, job->size, INTERNAL_MEM_THREAD_POOL_JOB);
+}
+
+void
+sgen_thread_pool_job_enqueue (SgenThreadPoolJob *job)
+{
+       mono_mutex_lock (&lock);
+
+       sgen_pointer_queue_add (&job_queue, job);
+       /*
+        * FIXME: We could check whether there is a job in progress.  If there is, there's
+        * no need to signal the condition, at least as long as we have only one thread.
+        */
+       mono_cond_signal (&work_cond);
+
+       mono_mutex_unlock (&lock);
+}
+
+void
+sgen_thread_pool_job_wait (SgenThreadPoolJob *job)
+{
+       SGEN_ASSERT (0, job, "Where's the job?");
+
+       mono_mutex_lock (&lock);
+
+       while (find_job_in_queue (job) >= 0)
+               mono_cond_wait (&done_cond, &lock);
+
+       mono_mutex_unlock (&lock);
+}
+
+void
+sgen_thread_pool_idle_signal (void)
+{
+       SGEN_ASSERT (0, idle_job_func, "Why are we signaling idle without an idle function?");
+
+       if (idle_working)
+               return;
+
+       mono_mutex_lock (&lock);
+
+       idle_working = TRUE;
+       mono_cond_signal (&work_cond);
+
+       mono_mutex_unlock (&lock);
+}
+
+void
+sgen_thread_pool_idle_wait (void)
+{
+       SGEN_ASSERT (0, idle_job_func, "Why are we waiting for idle without an idle function?");
+
+       if (!idle_working)
+               return;
+
+       mono_mutex_lock (&lock);
+
+       while (idle_working)
+               mono_cond_wait (&done_cond, &lock);
+
+       mono_mutex_unlock (&lock);
+}
+
+void
+sgen_thread_pool_wait_for_all_jobs (void)
+{
+       mono_mutex_lock (&lock);
+
+       while (!sgen_pointer_queue_is_empty (&job_queue))
+               mono_cond_wait (&done_cond, &lock);
+
+       mono_mutex_unlock (&lock);
+}
+
+gboolean
+sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId some_thread)
+{
+       return some_thread == thread;
+}
+
+#endif
diff --git a/mono/metadata/sgen-thread-pool.h b/mono/metadata/sgen-thread-pool.h
new file mode 100644 (file)
index 0000000..e0673f0
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * sgen-thread-pool.h: Threadpool for all concurrent GC work.
+ *
+ * Copyright (C) 2015 Xamarin Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License 2.0 as published by the Free Software Foundation;
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License 2.0 along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __MONO_SGEN_THREAD_POOL_H__
+#define __MONO_SGEN_THREAD_POOL_H__
+
+typedef struct _SgenThreadPoolJob SgenThreadPoolJob;
+
+typedef void (*SgenThreadPoolJobFunc) (void *thread_data, SgenThreadPoolJob *job);
+
+struct _SgenThreadPoolJob {
+       const char *name;
+       SgenThreadPoolJobFunc func;
+       size_t size;
+       volatile gint32 state;
+};
+
+typedef void (*SgenThreadPoolThreadInitFunc) (void*);
+typedef gboolean (*SgenThreadPoolIdleJobFunc) (void*);
+
+void sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, void **thread_datas);
+
+SgenThreadPoolJob* sgen_thread_pool_job_alloc (const char *name, SgenThreadPoolJobFunc func, size_t size);
+/* This only needs to be called on jobs that are not enqueued. */
+void sgen_thread_pool_job_free (SgenThreadPoolJob *job);
+
+void sgen_thread_pool_job_enqueue (SgenThreadPoolJob *job);
+/* This must only be called after the job has been enqueued. */
+void sgen_thread_pool_job_wait (SgenThreadPoolJob *job);
+
+void sgen_thread_pool_idle_signal (void);
+void sgen_thread_pool_idle_wait (void);
+
+void sgen_thread_pool_wait_for_all_jobs (void);
+
+gboolean sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId thread);
+
+#endif
index 9920845e468a1fd0c2ee4fad6989149b739ad8f4..1c68dfc52120f25f2e4c4adf191b148e4796281c 100644 (file)
 
 #include "metadata/sgen-gc.h"
 #include "metadata/sgen-workers.h"
+#include "metadata/sgen-thread-pool.h"
 #include "utils/mono-counters.h"
 
 static int workers_num;
 static WorkerData *workers_data;
-static void *workers_gc_thread_major_collector_data = NULL;
 
 static SgenSectionGrayQueue workers_distribute_gray_queue;
 static gboolean workers_distribute_gray_queue_inited;
 
-static gboolean workers_started = FALSE;
-
 enum {
        STATE_NOT_WORKING,
        STATE_WORKING,
        STATE_NURSERY_COLLECTION
 } WorkersStateName;
 
-/*
- * | state                    | num_awake | num_posted                 | post_done |
- * |--------------------------+-----------+----------------------------+-----------|
- * | STATE_NOT_WORKING        | 0         | *                          |         0 |
- * | STATE_WORKING            | > 0       | <= workers_num - num_awake |         * |
- * | STATE_NURSERY_COLLECTION | *         | <= workers_num - num_awake |         1 |
- * | STATE_NURSERY_COLLECTION | 0         | 0                          |         0 |
- */
-typedef union {
-       gint32 value;
-       struct {
-               guint state : 4; /* WorkersStateName */
-               /* Number of worker threads awake. */
-               guint num_awake : 8;
-               /* The state of the waiting semaphore. */
-               guint num_posted : 8;
-               /* Whether to post `workers_done_sem` */
-               guint post_done : 1;
-       } data;
-} State;
+typedef gint32 State;
 
 static volatile State workers_state;
 
-static MonoSemType workers_waiting_sem;
-static MonoSemType workers_done_sem;
-
-static volatile int workers_job_queue_num_entries = 0;
-static volatile JobQueueEntry *workers_job_queue = NULL;
-static LOCK_DECLARE (workers_job_queue_mutex);
-static int workers_num_jobs_enqueued = 0;
-static volatile int workers_num_jobs_finished = 0;
-
-static guint64 stat_workers_stolen_from_self_lock;
-static guint64 stat_workers_stolen_from_self_no_lock;
-static guint64 stat_workers_stolen_from_others;
-static guint64 stat_workers_num_waited;
+static guint64 stat_workers_num_finished;
 
 static gboolean
 set_state (State old_state, State new_state)
 {
-       if (old_state.data.state == STATE_NURSERY_COLLECTION)
-               SGEN_ASSERT (0, new_state.data.state != STATE_NOT_WORKING, "Can't go from nursery collection to not working");
+       if (old_state == STATE_NURSERY_COLLECTION)
+               SGEN_ASSERT (0, new_state != STATE_NOT_WORKING, "Can't go from nursery collection to not working");
 
-       return InterlockedCompareExchange (&workers_state.value,
-                       new_state.value, old_state.value) == old_state.value;
+       return InterlockedCompareExchange (&workers_state, new_state, old_state) == old_state;
 }
 
 static void
 assert_not_working (State state)
 {
-       SGEN_ASSERT (0, state.data.state == STATE_NOT_WORKING, "Can only signal enqueue work when in no work state");
-       SGEN_ASSERT (0, state.data.num_awake == 0, "No workers can be awake when not working");
-       SGEN_ASSERT (0, state.data.num_posted == 0, "Can't have posted already");
-       SGEN_ASSERT (0, !state.data.post_done, "post_done can only be set when working");
-
+       SGEN_ASSERT (0, state == STATE_NOT_WORKING, "Can only signal enqueue work when in no work state");
 }
 
 static void
-assert_working (State state, gboolean from_worker)
+assert_working (State state)
 {
-       SGEN_ASSERT (0, state.data.state == STATE_WORKING, "A worker can't wait without being in working state");
-       if (from_worker)
-               SGEN_ASSERT (0, state.data.num_awake > 0, "How can we be awake, yet we are not counted?");
-       else
-               SGEN_ASSERT (0, state.data.num_awake + state.data.num_posted > 0, "How can we be working, yet no worker threads are awake or to be awoken?");
-       SGEN_ASSERT (0, state.data.num_awake + state.data.num_posted <= workers_num, "There are too many worker threads awake");
+       SGEN_ASSERT (0, state == STATE_WORKING, "A worker can't wait without being in working state");
 }
 
 static void
-assert_nursery_collection (State state, gboolean from_worker)
+assert_nursery_collection (State state)
 {
-       SGEN_ASSERT (0, state.data.state == STATE_NURSERY_COLLECTION, "Must be in the nursery collection state");
-       if (from_worker) {
-               SGEN_ASSERT (0, state.data.num_awake > 0, "We're awake, but num_awake is zero");
-               SGEN_ASSERT (0, state.data.post_done, "post_done must be set in the nursery collection state");
-       }
-       SGEN_ASSERT (0, state.data.num_awake <= workers_num, "There are too many worker threads awake");
-       if (!state.data.post_done) {
-               SGEN_ASSERT (0, state.data.num_awake == 0, "Once done has been posted no threads can be awake");
-               SGEN_ASSERT (0, state.data.num_posted == 0, "Once done has been posted no thread must be awoken");
-       }
+       SGEN_ASSERT (0, state == STATE_NURSERY_COLLECTION, "Must be in the nursery collection state");
 }
 
 static void
 assert_working_or_nursery_collection (State state)
 {
-       if (state.data.state == STATE_WORKING)
-               assert_working (state, TRUE);
-       else
-               assert_nursery_collection (state, TRUE);
+       if (state != STATE_WORKING)
+               assert_nursery_collection (state);
 }
 
 static void
-workers_signal_enqueue_work (int num_wake_up, gboolean from_nursery_collection)
+workers_signal_enqueue_work (gboolean from_nursery_collection)
 {
        State old_state = workers_state;
-       State new_state = old_state;
-       int i;
        gboolean did_set_state;
 
-       SGEN_ASSERT (0, num_wake_up <= workers_num, "Cannot wake up more workers than are present");
-
        if (from_nursery_collection)
-               assert_nursery_collection (old_state, FALSE);
+               assert_nursery_collection (old_state);
        else
                assert_not_working (old_state);
 
-       new_state.data.state = STATE_WORKING;
-       new_state.data.num_posted = num_wake_up;
-
-       did_set_state = set_state (old_state, new_state);
+       did_set_state = set_state (old_state, STATE_WORKING);
        SGEN_ASSERT (0, did_set_state, "Nobody else should be mutating the state");
 
-       for (i = 0; i < num_wake_up; ++i)
-               MONO_SEM_POST (&workers_waiting_sem);
+       sgen_thread_pool_idle_signal ();
 }
 
 static void
-workers_signal_enqueue_work_if_necessary (int num_wake_up)
+workers_signal_enqueue_work_if_necessary (void)
 {
-       if (workers_state.data.state == STATE_NOT_WORKING)
-               workers_signal_enqueue_work (num_wake_up, FALSE);
+       if (workers_state == STATE_NOT_WORKING)
+               workers_signal_enqueue_work (FALSE);
 }
 
 void
 sgen_workers_ensure_awake (void)
 {
-       SGEN_ASSERT (0, workers_state.data.state != STATE_NURSERY_COLLECTION, "Can't wake workers during nursery collection");
-       workers_signal_enqueue_work_if_necessary (workers_num);
+       SGEN_ASSERT (0, workers_state != STATE_NURSERY_COLLECTION, "Can't wake workers during nursery collection");
+       workers_signal_enqueue_work_if_necessary ();
 }
 
 static void
-workers_wait (void)
+worker_finish (void)
 {
-       State old_state, new_state;
-       gboolean post_done;
+       State old_state;
 
-       ++stat_workers_num_waited;
+       ++stat_workers_num_finished;
 
        do {
-               new_state = old_state = workers_state;
+               old_state = workers_state;
 
                assert_working_or_nursery_collection (old_state);
+               if (old_state == STATE_NURSERY_COLLECTION)
+                       return;
 
-               --new_state.data.num_awake;
-               post_done = FALSE;
-               if (!new_state.data.num_awake && !new_state.data.num_posted) {
-                       /* We are the last thread to go to sleep. */
-                       if (old_state.data.state == STATE_WORKING)
-                               new_state.data.state = STATE_NOT_WORKING;
-
-                       new_state.data.post_done = 0;
-                       if (old_state.data.post_done)
-                               post_done = TRUE;
-               }
-       } while (!set_state (old_state, new_state));
-
-       if (post_done)
-               MONO_SEM_POST (&workers_done_sem);
-
-       MONO_SEM_WAIT (&workers_waiting_sem);
-
-       do {
-               new_state = old_state = workers_state;
-
-               SGEN_ASSERT (0, old_state.data.num_posted > 0, "How can we be awake without the semaphore having been posted?");
-               SGEN_ASSERT (0, old_state.data.num_awake < workers_num, "There are too many worker threads awake");
-
-               --new_state.data.num_posted;
-               ++new_state.data.num_awake;
-
-               assert_working_or_nursery_collection (new_state);
-       } while (!set_state (old_state, new_state));
+               /* We are the last thread to go to sleep. */
+       } while (!set_state (old_state, STATE_NOT_WORKING));
 }
 
 static gboolean
@@ -223,117 +135,45 @@ collection_needs_workers (void)
 }
 
 void
-sgen_workers_enqueue_job (const char *name, JobFunc func, void *data)
+sgen_workers_enqueue_job (SgenThreadPoolJob *job)
 {
-       int num_entries;
-       JobQueueEntry *entry;
-
        if (!collection_needs_workers ()) {
-               func (NULL, data);
+               job->func (NULL, job);
+               sgen_thread_pool_job_free (job);
                return;
        }
 
-       entry = sgen_alloc_internal (INTERNAL_MEM_JOB_QUEUE_ENTRY);
-       entry->name = name;
-       entry->func = func;
-       entry->data = data;
-
-       mono_mutex_lock (&workers_job_queue_mutex);
-       entry->next = workers_job_queue;
-       workers_job_queue = entry;
-       num_entries = ++workers_job_queue_num_entries;
-       ++workers_num_jobs_enqueued;
-       mono_mutex_unlock (&workers_job_queue_mutex);
-
-       if (workers_state.data.state != STATE_NURSERY_COLLECTION)
-               workers_signal_enqueue_work_if_necessary (num_entries < workers_num ? num_entries : workers_num);
+       sgen_thread_pool_job_enqueue (job);
 }
 
 void
 sgen_workers_wait_for_jobs_finished (void)
 {
-       // FIXME: implement this properly
-       while (workers_num_jobs_finished < workers_num_jobs_enqueued) {
-               workers_signal_enqueue_work_if_necessary (workers_num);
-               /* FIXME: sleep less? */
-               g_usleep (1000);
-       }
+       sgen_thread_pool_wait_for_all_jobs ();
 }
 
 void
 sgen_workers_signal_start_nursery_collection_and_wait (void)
 {
-       State old_state, new_state;
+       State old_state;
 
        do {
-               new_state = old_state = workers_state;
-
-               new_state.data.state = STATE_NURSERY_COLLECTION;
-
-               if (old_state.data.state == STATE_NOT_WORKING) {
-                       assert_not_working (old_state);
-               } else {
-                       assert_working (old_state, FALSE);
-                       SGEN_ASSERT (0, !old_state.data.post_done, "We are not waiting for the workers");
+               old_state = workers_state;
 
-                       new_state.data.post_done = 1;
-               }
-       } while (!set_state (old_state, new_state));
+               if (old_state != STATE_NOT_WORKING)
+                       assert_working (old_state);
+       } while (!set_state (old_state, STATE_NURSERY_COLLECTION));
 
-       if (new_state.data.post_done)
-               MONO_SEM_WAIT (&workers_done_sem);
+       sgen_thread_pool_idle_wait ();
 
-       old_state = workers_state;
-       assert_nursery_collection (old_state, FALSE);
-       SGEN_ASSERT (0, !old_state.data.post_done, "We got the semaphore, so it must have been posted");
+       assert_nursery_collection (workers_state);
 }
 
 void
 sgen_workers_signal_finish_nursery_collection (void)
 {
-       State old_state = workers_state;
-
-       assert_nursery_collection (old_state, FALSE);
-       SGEN_ASSERT (0, !old_state.data.post_done, "We are finishing the nursery collection, so we should have waited for the semaphore earlier");
-
-       workers_signal_enqueue_work (workers_num, TRUE);
-}
-
-static gboolean
-workers_dequeue_and_do_job (WorkerData *data)
-{
-       JobQueueEntry *entry;
-
-       /*
-        * At this point the GC might not be running anymore.  We
-        * could have been woken up by a job that was then taken by
-        * another thread, after which the collection finished, so we
-        * first have to successfully dequeue a job before doing
-        * anything assuming that the collection is still ongoing.
-        */
-
-       if (!workers_job_queue_num_entries)
-               return FALSE;
-
-       mono_mutex_lock (&workers_job_queue_mutex);
-       entry = (JobQueueEntry*)workers_job_queue;
-       if (entry) {
-               workers_job_queue = entry->next;
-               --workers_job_queue_num_entries;
-       }
-       mono_mutex_unlock (&workers_job_queue_mutex);
-
-       if (!entry)
-               return FALSE;
-
-       g_assert (collection_needs_workers ());
-
-       entry->func (data, entry->data);
-       sgen_free_internal (entry, INTERNAL_MEM_JOB_QUEUE_ENTRY);
-
-       SGEN_ATOMIC_ADD (workers_num_jobs_finished, 1);
-
-       return TRUE;
+       assert_nursery_collection (workers_state);
+       workers_signal_enqueue_work (TRUE);
 }
 
 static gboolean
@@ -373,66 +213,59 @@ init_private_gray_queue (WorkerData *data)
                        sgen_get_major_collector ()->is_concurrent ? concurrent_enqueue_check : NULL);
 }
 
-static mono_native_thread_return_t
-workers_thread_func (void *data_untyped)
+static void
+thread_pool_init_func (void *data_untyped)
 {
        WorkerData *data = data_untyped;
        SgenMajorCollector *major = sgen_get_major_collector ();
 
        mono_thread_info_register_small_id ();
 
-       if (major->init_worker_thread)
-               major->init_worker_thread (data->major_collector_data);
+       if (!major->is_concurrent)
+               return;
 
        init_private_gray_queue (data);
+}
 
-       for (;;) {
-               gboolean did_work = FALSE;
-
-               SGEN_ASSERT (0, sgen_get_current_collection_generation () != GENERATION_NURSERY, "Why are we doing work while there's a nursery collection happening?");
-
-               while (workers_state.data.state == STATE_WORKING && workers_dequeue_and_do_job (data)) {
-                       did_work = TRUE;
-                       /* FIXME: maybe distribute the gray queue here? */
-               }
+static gboolean
+marker_idle_func (void *data_untyped)
+{
+       WorkerData *data = data_untyped;
+       SgenMajorCollector *major = sgen_get_major_collector ();
 
-               if (!did_work && (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data))) {
-                       SgenObjectOperations *ops = sgen_concurrent_collection_in_progress ()
-                               ? &major->major_concurrent_ops
-                               : &major->major_ops;
-                       ScanCopyContext ctx = { ops->scan_object, NULL, &data->private_gray_queue };
+       if (workers_state != STATE_WORKING)
+               return FALSE;
 
-                       g_assert (!sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+       SGEN_ASSERT (0, sgen_get_current_collection_generation () != GENERATION_NURSERY, "Why are we doing work while there's a nursery collection happening?");
 
-                       while (!sgen_drain_gray_stack (32, ctx)) {
-                               if (workers_state.data.state == STATE_NURSERY_COLLECTION)
-                                       workers_wait ();
-                       }
-                       g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+       if (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data)) {
+               SgenObjectOperations *ops = sgen_concurrent_collection_in_progress ()
+                       ? &major->major_concurrent_ops
+                       : &major->major_ops;
+               ScanCopyContext ctx = { ops->scan_object, NULL, &data->private_gray_queue };
 
-                       init_private_gray_queue (data);
+               SGEN_ASSERT (0, !sgen_gray_object_queue_is_empty (&data->private_gray_queue), "How is our gray queue empty if we just got work?");
 
-                       did_work = TRUE;
-               }
+               sgen_drain_gray_stack (32, ctx);
 
-               if (!did_work)
-                       workers_wait ();
+               return TRUE;
        }
 
-       /* dummy return to make compilers happy */
-       return NULL;
+       worker_finish ();
+
+       return FALSE;
 }
 
 static void
-init_distribute_gray_queue (gboolean locked)
+init_distribute_gray_queue (void)
 {
        if (workers_distribute_gray_queue_inited) {
                g_assert (sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue));
-               g_assert (!workers_distribute_gray_queue.locked == !locked);
+               g_assert (workers_distribute_gray_queue.locked);
                return;
        }
 
-       sgen_section_gray_queue_init (&workers_distribute_gray_queue, locked,
+       sgen_section_gray_queue_init (&workers_distribute_gray_queue, TRUE,
                        sgen_get_major_collector ()->is_concurrent ? concurrent_enqueue_check : NULL);
        workers_distribute_gray_queue_inited = TRUE;
 }
@@ -440,19 +273,21 @@ init_distribute_gray_queue (gboolean locked)
 void
 sgen_workers_init_distribute_gray_queue (void)
 {
-       if (!collection_needs_workers ())
-               return;
-
-       init_distribute_gray_queue (sgen_get_major_collector ()->is_concurrent);
+       SGEN_ASSERT (0, sgen_get_major_collector ()->is_concurrent && collection_needs_workers (),
+                       "Why should we init the distribute gray queue if we don't need it?");
+       init_distribute_gray_queue ();
 }
 
 void
 sgen_workers_init (int num_workers)
 {
        int i;
+       void *workers_data_ptrs [num_workers];
 
-       if (!sgen_get_major_collector ()->is_concurrent)
+       if (!sgen_get_major_collector ()->is_concurrent) {
+               sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL);
                return;
+       }
 
        //g_print ("initing %d workers\n", num_workers);
 
@@ -461,111 +296,39 @@ sgen_workers_init (int num_workers)
        workers_data = sgen_alloc_internal_dynamic (sizeof (WorkerData) * num_workers, INTERNAL_MEM_WORKER_DATA, TRUE);
        memset (workers_data, 0, sizeof (WorkerData) * num_workers);
 
-       MONO_SEM_INIT (&workers_waiting_sem, 0);
-       MONO_SEM_INIT (&workers_done_sem, 0);
-
-       init_distribute_gray_queue (sgen_get_major_collector ()->is_concurrent);
-
-       if (sgen_get_major_collector ()->alloc_worker_data)
-               workers_gc_thread_major_collector_data = sgen_get_major_collector ()->alloc_worker_data ();
-
-       for (i = 0; i < workers_num; ++i) {
-               workers_data [i].index = i;
-
-               if (sgen_get_major_collector ()->alloc_worker_data)
-                       workers_data [i].major_collector_data = sgen_get_major_collector ()->alloc_worker_data ();
-       }
-
-       LOCK_INIT (workers_job_queue_mutex);
+       init_distribute_gray_queue ();
 
-       sgen_register_fixed_internal_mem_type (INTERNAL_MEM_JOB_QUEUE_ENTRY, sizeof (JobQueueEntry));
-
-       mono_counters_register ("Stolen from self lock", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_stolen_from_self_lock);
-       mono_counters_register ("Stolen from self no lock", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_stolen_from_self_no_lock);
-       mono_counters_register ("Stolen from others", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_stolen_from_others);
-       mono_counters_register ("# workers waited", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_num_waited);
-}
-
-/* only the GC thread is allowed to start and join workers */
+       for (i = 0; i < workers_num; ++i)
+               workers_data_ptrs [i] = &workers_data [i];
 
-static void
-workers_start_worker (int index)
-{
-       g_assert (index >= 0 && index < workers_num);
+       sgen_thread_pool_init (num_workers, thread_pool_init_func, marker_idle_func, workers_data_ptrs);
 
-       g_assert (!workers_data [index].thread);
-       mono_native_thread_create (&workers_data [index].thread, workers_thread_func, &workers_data [index]);
+       mono_counters_register ("# workers finished", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_num_finished);
 }
 
 void
 sgen_workers_start_all_workers (void)
 {
-       State old_state, new_state;
-       int i;
-       gboolean result;
-
        if (!collection_needs_workers ())
                return;
 
-       if (sgen_get_major_collector ()->init_worker_thread)
-               sgen_get_major_collector ()->init_worker_thread (workers_gc_thread_major_collector_data);
-
-       old_state = new_state = workers_state;
-       assert_not_working (old_state);
-
-       g_assert (workers_job_queue_num_entries == 0);
-       workers_num_jobs_enqueued = 0;
-       workers_num_jobs_finished = 0;
-
-       if (workers_started) {
-               workers_signal_enqueue_work (workers_num, FALSE);
-               return;
-       }
-
-       new_state.data.state = STATE_WORKING;
-       new_state.data.num_awake = workers_num;
-       result = set_state (old_state, new_state);
-       SGEN_ASSERT (0, result, "Nobody else should have modified the state - workers have not been started yet");
-
-       for (i = 0; i < workers_num; ++i)
-               workers_start_worker (i);
-
-       workers_started = TRUE;
-}
-
-gboolean
-sgen_workers_have_started (void)
-{
-       return workers_started;
+       workers_signal_enqueue_work (FALSE);
 }
 
 void
 sgen_workers_join (void)
 {
-       State old_state;
        int i;
 
        if (!collection_needs_workers ())
                return;
 
-       for (;;) {
-               old_state = workers_state;
-               SGEN_ASSERT (0, old_state.data.state != STATE_NURSERY_COLLECTION, "Can't be in nursery collection when joining");
-
-               if (old_state.data.state == STATE_WORKING) {
-                       State new_state = old_state;
-
-                       SGEN_ASSERT (0, !old_state.data.post_done, "Why is post_done already set?");
-                       new_state.data.post_done = 1;
-                       if (!set_state (old_state, new_state))
-                               continue;
-
-                       MONO_SEM_WAIT (&workers_done_sem);
+       sgen_thread_pool_wait_for_all_jobs ();
 
-                       old_state = workers_state;
-               }
-
-               assert_not_working (old_state);
+       for (;;) {
+               SGEN_ASSERT (0, workers_state != STATE_NURSERY_COLLECTION, "Can't be in nursery collection when joining");
+               sgen_thread_pool_idle_wait ();
+               assert_not_working (workers_state);
 
                /*
                 * Checking whether there is still work left and, if not, going to sleep,
@@ -573,20 +336,14 @@ sgen_workers_join (void)
                 * workers.  Therefore there's a race condition where work can be added
                 * after they've checked for work, and before they've gone to sleep.
                 */
-               if (!workers_job_queue_num_entries && sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue))
+               if (sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue))
                        break;
 
-               workers_signal_enqueue_work (workers_num, FALSE);
+               workers_signal_enqueue_work (FALSE);
        }
 
        /* At this point all the workers have stopped. */
 
-       if (sgen_get_major_collector ()->reset_worker_data) {
-               for (i = 0; i < workers_num; ++i)
-                       sgen_get_major_collector ()->reset_worker_data (workers_data [i].major_collector_data);
-       }
-
-       g_assert (workers_job_queue_num_entries == 0);
        g_assert (sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue));
        for (i = 0; i < workers_num; ++i)
                g_assert (sgen_gray_object_queue_is_empty (&workers_data [i].private_gray_queue));
@@ -595,29 +352,20 @@ sgen_workers_join (void)
 gboolean
 sgen_workers_all_done (void)
 {
-       return workers_state.data.state == STATE_NOT_WORKING;
+       return workers_state == STATE_NOT_WORKING;
 }
 
 gboolean
 sgen_workers_are_working (void)
 {
-       State state = workers_state;
-       return state.data.num_awake > 0 || state.data.num_posted > 0;
+       return workers_state == STATE_WORKING;
 }
 
-gboolean
-sgen_is_worker_thread (MonoNativeThreadId thread)
+void
+sgen_workers_wait (void)
 {
-       int i;
-
-       if (sgen_get_major_collector ()->is_worker_thread && sgen_get_major_collector ()->is_worker_thread (thread))
-               return TRUE;
-
-       for (i = 0; i < workers_num; ++i) {
-               if (workers_data [i].thread == thread)
-                       return TRUE;
-       }
-       return FALSE;
+       sgen_thread_pool_idle_wait ();
+       SGEN_ASSERT (0, sgen_workers_all_done (), "Why are the workers not done after we wait for them?");
 }
 
 SgenSectionGrayQueue*
@@ -626,10 +374,4 @@ sgen_workers_get_distribute_section_gray_queue (void)
        return &workers_distribute_gray_queue;
 }
 
-void
-sgen_workers_reset_data (void)
-{
-       if (sgen_get_major_collector ()->reset_worker_data)
-               sgen_get_major_collector ()->reset_worker_data (workers_gc_thread_major_collector_data);
-}
 #endif
index 5c509de612898c2bb40264c5862b615a46266427..7211b6551d920fac3b4f7984850f54319a194c18 100644 (file)
 #ifndef __MONO_SGEN_WORKER_H__
 #define __MONO_SGEN_WORKER_H__
 
+#include "mono/metadata/sgen-thread-pool.h"
+
 typedef struct _WorkerData WorkerData;
 struct _WorkerData {
-       int index;
-       MonoNativeThreadId thread;
-       void *major_collector_data;
-
        SgenGrayQueue private_gray_queue; /* only read/written by worker thread */
 };
 
-typedef void (*JobFunc) (WorkerData *worker_data, void *job_data);
-
-typedef struct _JobQueueEntry JobQueueEntry;
-struct _JobQueueEntry {
-       const char *name;
-       JobFunc func;
-       void *data;
-
-       volatile JobQueueEntry *next;
-};
-
 void sgen_workers_init (int num_workers);
 void sgen_workers_start_all_workers (void);
-gboolean sgen_workers_have_started (void);
 void sgen_workers_ensure_awake (void);
 void sgen_workers_init_distribute_gray_queue (void);
-void sgen_workers_enqueue_job (const char *name, JobFunc func, void *data);
+void sgen_workers_enqueue_job (SgenThreadPoolJob *job);
 void sgen_workers_wait_for_jobs_finished (void);
 void sgen_workers_distribute_gray_queue_sections (void);
 void sgen_workers_reset_data (void);
 void sgen_workers_join (void);
 gboolean sgen_workers_all_done (void);
 gboolean sgen_workers_are_working (void);
+void sgen_workers_wait (void);
 SgenSectionGrayQueue* sgen_workers_get_distribute_section_gray_queue (void);
 
 void sgen_workers_signal_start_nursery_collection_and_wait (void);