#include <errno.h>
#include <assert.h>
#include <stdlib.h>
+#include <glib.h>
#include "mono/sgen/sgen-gc.h"
#include "mono/sgen/sgen-cardtable.h"
#include <mono/utils/memcheck.h>
#include <mono/utils/mono-mmap-internals.h>
+#include <mono/utils/unlocked.h>
#undef pthread_create
#undef pthread_join
static guint64 time_major_pinning = 0;
static guint64 time_major_scan_pinned = 0;
static guint64 time_major_scan_roots = 0;
-static guint64 time_major_scan_mod_union = 0;
+static guint64 time_major_scan_mod_union_blocks = 0;
+static guint64 time_major_scan_mod_union_los = 0;
static guint64 time_major_finish_gray_stack = 0;
static guint64 time_major_free_bigobjs = 0;
static guint64 time_major_los_sweep = 0;
mono_counters_register ("Major pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pinning);
mono_counters_register ("Major scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_pinned);
mono_counters_register ("Major scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_roots);
- mono_counters_register ("Major scan mod union", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union);
+ mono_counters_register ("Major scan mod union blocks", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union_blocks);
+ mono_counters_register ("Major scan mod union los", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union_los);
mono_counters_register ("Major finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_finish_gray_stack);
mono_counters_register ("Major free big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_free_bigobjs);
mono_counters_register ("Major LOS sweep", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_los_sweep);
typedef struct {
ScanJob scan_job;
int job_index, job_split_count;
+ int data;
} ParallelScanJob;
static ScanCopyContext
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
SGEN_TV_GETTIME (atv);
- major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
+ major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
SGEN_TV_GETTIME (btv);
time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
SGEN_TV_GETTIME (btv);
time_minor_scan_los += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (atv);
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
+ SGEN_TV_GETTIME (btv);
+ time_major_scan_mod_union_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
+ SGEN_TV_GETTIME (atv);
sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (btv);
+ time_major_scan_mod_union_los += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
+ SGEN_TV_GETTIME (atv);
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
+ SGEN_TV_GETTIME (btv);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+ g_assert (worker_data_untyped);
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
-
+ SGEN_TV_GETTIME (atv);
sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (btv);
+
+ g_assert (worker_data_untyped);
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
{
ParallelScanJob *psj;
ScanJob *sj;
+ size_t num_major_sections = major_collector.get_num_major_sections ();
int split_count = sgen_workers_get_job_split_count (GENERATION_OLD);
int i;
/* Mod union preclean jobs */
psj->scan_job.gc_thread_gray_queue = NULL;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_OLD, &psj->scan_job.job, TRUE);
}
enqueue_scan_remembered_set_jobs (SgenGrayQueue *gc_thread_gray_queue, SgenObjectOperations *ops, gboolean enqueue)
{
int i, split_count = sgen_workers_get_job_split_count (GENERATION_NURSERY);
+ size_t num_major_sections = major_collector.get_num_major_sections ();
ScanJob *sj;
sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan wbroots", job_scan_wbroots, sizeof (ScanJob));
psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_NURSERY, &psj->scan_job.job, enqueue);
psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS remsets", job_scan_los_card_table, sizeof (ParallelScanJob));
TV_DECLARE (btv);
SGEN_TV_DECLARE (last_minor_collection_start_tv);
SGEN_TV_DECLARE (last_minor_collection_end_tv);
+ guint64 major_scan_start = time_minor_scan_major_blocks;
+ guint64 los_scan_start = time_minor_scan_los;
+ guint64 finish_gray_start = time_minor_finish_gray_stack;
if (disable_minor_collections)
return TRUE;
TV_GETTIME (last_minor_collection_start_tv);
atv = last_minor_collection_start_tv;
- binary_protocol_collection_begin (gc_stats.minor_gc_count, GENERATION_NURSERY);
+ binary_protocol_collection_begin (InterlockedRead (&gc_stats.minor_gc_count), GENERATION_NURSERY);
object_ops_nopar = sgen_concurrent_collection_in_progress ()
? &sgen_minor_collector.serial_ops_with_concurrent_major
degraded_mode = 0;
objects_pinned = 0;
- SGEN_LOG (1, "Start nursery collection %d %p-%p, size: %d", gc_stats.minor_gc_count, nursery_section->data, nursery_section->end_data, (int)(nursery_section->end_data - nursery_section->data));
+ SGEN_LOG (1, "Start nursery collection %" G_GINT32_FORMAT " %p-%p, size: %d", InterlockedRead (&gc_stats.minor_gc_count), nursery_section->data, nursery_section->end_data, (int)(nursery_section->end_data - nursery_section->data));
/* world must be stopped already */
TV_GETTIME (btv);
init_gray_queue (&gc_thread_gray_queue);
ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, &gc_thread_gray_queue);
- gc_stats.minor_gc_count ++;
+ InterlockedIncrement (&gc_stats.minor_gc_count);
sgen_process_fin_stage_entries ();
major_collector.finish_nursery_collection ();
TV_GETTIME (last_minor_collection_end_tv);
- gc_stats.minor_gc_time += TV_ELAPSED (last_minor_collection_start_tv, last_minor_collection_end_tv);
+ UnlockedAdd64 (&gc_stats.minor_gc_time, TV_ELAPSED (last_minor_collection_start_tv, last_minor_collection_end_tv));
- sgen_debug_dump_heap ("minor", gc_stats.minor_gc_count - 1, NULL);
+ sgen_debug_dump_heap ("minor", InterlockedRead (&gc_stats.minor_gc_count) - 1, NULL);
/* prepare the pin queue for the next collection */
sgen_finish_pinning ();
current_collection_generation = -1;
objects_pinned = 0;
- binary_protocol_collection_end (gc_stats.minor_gc_count - 1, GENERATION_NURSERY, 0, 0);
+ if (is_parallel)
+ binary_protocol_collection_end_stats (0, 0, time_minor_finish_gray_stack - finish_gray_start);
+ else
+ binary_protocol_collection_end_stats (
+ time_minor_scan_major_blocks - major_scan_start,
+ time_minor_scan_los - los_scan_start,
+ time_minor_finish_gray_stack - finish_gray_start);
+
+ binary_protocol_collection_end (InterlockedRead (&gc_stats.minor_gc_count) - 1, GENERATION_NURSERY, 0, 0);
if (check_nursery_objects_pinned && !sgen_minor_collector.is_split)
sgen_check_nursery_objects_pinned (unpin_queue != NULL);
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
if (object_ops_par != NULL)
sgen_workers_set_num_active_workers (GENERATION_OLD, 0);
- if (sgen_workers_have_idle_work (GENERATION_OLD)) {
+ if (object_ops_par == NULL && sgen_workers_have_idle_work (GENERATION_OLD)) {
/*
* We force the finish of the worker with the new object ops context
- * which can also do copying. We need to have finished pinning.
+ * which can also do copying. We need to have finished pinning. On the
+ * parallel collector, there is no need to drain the private queues
+ * here, since we can do it as part of the finishing work, achieving
+ * better work distribution.
*/
sgen_workers_start_all_workers (GENERATION_OLD, object_ops_nopar, object_ops_par, NULL);
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
int i, split_count = sgen_workers_get_job_split_count (GENERATION_OLD);
+ size_t num_major_sections = major_collector.get_num_major_sections ();
gboolean parallel = object_ops_par != NULL;
/* If we're not parallel we finish the collection on the gc thread */
psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_OLD, &psj->scan_job.job, parallel);
psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_card_table, sizeof (ParallelScanJob));
{
SgenObjectOperations *object_ops_nopar, *object_ops_par = NULL;
- binary_protocol_collection_begin (gc_stats.major_gc_count, GENERATION_OLD);
+ binary_protocol_collection_begin (InterlockedRead (&gc_stats.major_gc_count), GENERATION_OLD);
current_collection_generation = GENERATION_OLD;
check_scan_starts ();
degraded_mode = 0;
- SGEN_LOG (1, "Start major collection %d", gc_stats.major_gc_count);
- gc_stats.major_gc_count ++;
+ SGEN_LOG (1, "Start major collection %" G_GINT32_FORMAT, InterlockedRead (&gc_stats.major_gc_count));
+ InterlockedIncrement (&gc_stats.major_gc_count);
if (major_collector.start_major_collection)
major_collector.start_major_collection ();
mword fragment_total;
TV_DECLARE (atv);
TV_DECLARE (btv);
-
- TV_GETTIME (btv);
+ guint64 major_scan_start = time_major_scan_mod_union_blocks;
+ guint64 los_scan_start = time_major_scan_mod_union_los;
+ guint64 finish_gray_start = time_major_finish_gray_stack;
if (concurrent_collection_in_progress) {
SgenObjectOperations *object_ops_par = NULL;
sgen_workers_assert_gray_queue_is_empty (GENERATION_OLD);
+ TV_GETTIME (btv);
finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue));
TV_GETTIME (atv);
time_major_finish_gray_stack += TV_ELAPSED (btv, atv);
TV_GETTIME (atv);
time_major_sweep += TV_ELAPSED (btv, atv);
- sgen_debug_dump_heap ("major", gc_stats.major_gc_count - 1, reason);
+ sgen_debug_dump_heap ("major", InterlockedRead (&gc_stats.major_gc_count) - 1, reason);
if (sgen_have_pending_finalizers ()) {
SGEN_LOG (4, "Finalizer-thread wakeup");
binary_protocol_flush_buffers (FALSE);
//consistency_check ();
+ if (major_collector.is_parallel)
+ binary_protocol_collection_end_stats (0, 0, time_major_finish_gray_stack - finish_gray_start);
+ else
+ binary_protocol_collection_end_stats (
+ time_major_scan_mod_union_blocks - major_scan_start,
+ time_major_scan_mod_union_los - los_scan_start,
+ time_major_finish_gray_stack - finish_gray_start);
- binary_protocol_collection_end (gc_stats.major_gc_count - 1, GENERATION_OLD, counts.num_scanned_objects, counts.num_unique_scanned_objects);
+ binary_protocol_collection_end (InterlockedRead (&gc_stats.major_gc_count) - 1, GENERATION_OLD, counts.num_scanned_objects, counts.num_unique_scanned_objects);
}
static gboolean
sgen_gray_object_queue_dispose (&gc_thread_gray_queue);
TV_GETTIME (time_end);
- gc_stats.major_gc_time += TV_ELAPSED (time_start, time_end);
+ UnlockedAdd64 (&gc_stats.major_gc_time, TV_ELAPSED (time_start, time_end));
/* FIXME: also report this to the user, preferably in gc-end. */
if (major_collector.get_and_reset_num_major_objects_marked)
num_objects_marked = major_collector.get_and_reset_num_major_objects_marked ();
TV_GETTIME (time_end);
- gc_stats.major_gc_time += TV_ELAPSED (time_start, time_end);
+ UnlockedAdd64 (&gc_stats.major_gc_time, TV_ELAPSED (time_start, time_end));
current_collection_generation = -1;
}
sgen_los_update_cardtable_mod_union ();
TV_GETTIME (total_end);
- gc_stats.major_gc_time += TV_ELAPSED (total_start, total_end);
+ UnlockedAdd64 (&gc_stats.major_gc_time, TV_ELAPSED (total_start, total_end));
}
static void
sgen_workers_stop_all_workers (GENERATION_OLD);
SGEN_TV_GETTIME (time_major_conc_collection_end);
- gc_stats.major_gc_time_concurrent += SGEN_TV_ELAPSED (time_major_conc_collection_start, time_major_conc_collection_end);
+ UnlockedAdd64 (&gc_stats.major_gc_time_concurrent, SGEN_TV_ELAPSED (time_major_conc_collection_start, time_major_conc_collection_end));
major_collector.update_cardtable_mod_union ();
sgen_los_update_cardtable_mod_union ();
sgen_gray_object_queue_dispose (&gc_thread_gray_queue);
TV_GETTIME (total_end);
- gc_stats.major_gc_time += TV_ELAPSED (total_start, total_end);
+ UnlockedAdd64 (&gc_stats.major_gc_time, TV_ELAPSED (total_start, total_end));
current_collection_generation = -1;
}
int
sgen_gc_collection_count (int generation)
{
- if (generation == 0)
- return gc_stats.minor_gc_count;
- return gc_stats.major_gc_count;
+ return InterlockedRead (generation == GENERATION_NURSERY ? &gc_stats.minor_gc_count : &gc_stats.major_gc_count);
}
size_t