{
}
+static void G_GNUC_UNUSED
+sgen_client_binary_protocol_worker_finish_stats (int worker_index, int generation, gboolean forced, long long major_scan, long long los_scan, long long work_time)
+{
+}
+
+static void G_GNUC_UNUSED
+sgen_client_binary_protocol_collection_end_stats (long long major_scan, long long los_scan, long long finish_stack)
+{
+}
+
#define TLAB_ACCESS_INIT SgenThreadInfo *__thread_info__ = (SgenThreadInfo*)mono_tls_get_sgen_thread_info ()
#define IN_CRITICAL_REGION (__thread_info__->client_info.in_critical_region)
return *slot != NULL;
}
+/* Removes all NULL pointers from the array. Not thread safe */
+void
+sgen_array_list_remove_nulls (SgenArrayList *array)
+{
+ guint32 start = 0;
+ volatile gpointer *slot;
+ gboolean skipped = FALSE;
+
+ SGEN_ARRAY_LIST_FOREACH_SLOT (array, slot) {
+ if (*slot) {
+ *sgen_array_list_get_slot (array, start++) = *slot;
+ if (skipped)
+ *slot = NULL;
+ } else {
+ skipped = TRUE;
+ }
+ } SGEN_ARRAY_LIST_END_FOREACH_SLOT;
+
+ mono_memory_write_barrier ();
+ array->next_slot = start;
+ array->slot_hint = start;
+}
+
#endif
guint32 sgen_array_list_find (SgenArrayList *array, gpointer ptr);
gboolean sgen_array_list_default_cas_setter (volatile gpointer *slot, gpointer ptr, int data);
gboolean sgen_array_list_default_is_slot_set (volatile gpointer *slot);
-
+void sgen_array_list_remove_nulls (SgenArrayList *array);
#endif
static guint64 time_major_pinning = 0;
static guint64 time_major_scan_pinned = 0;
static guint64 time_major_scan_roots = 0;
-static guint64 time_major_scan_mod_union = 0;
+static guint64 time_major_scan_mod_union_blocks = 0;
+static guint64 time_major_scan_mod_union_los = 0;
static guint64 time_major_finish_gray_stack = 0;
static guint64 time_major_free_bigobjs = 0;
static guint64 time_major_los_sweep = 0;
mono_counters_register ("Major pinning", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_pinning);
mono_counters_register ("Major scan pinned", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_pinned);
mono_counters_register ("Major scan roots", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_roots);
- mono_counters_register ("Major scan mod union", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union);
+ mono_counters_register ("Major scan mod union blocks", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union_blocks);
+ mono_counters_register ("Major scan mod union los", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_scan_mod_union_los);
mono_counters_register ("Major finish gray stack", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_finish_gray_stack);
mono_counters_register ("Major free big objects", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_free_bigobjs);
mono_counters_register ("Major LOS sweep", MONO_COUNTER_GC | MONO_COUNTER_ULONG | MONO_COUNTER_TIME, &time_major_los_sweep);
typedef struct {
ScanJob scan_job;
int job_index, job_split_count;
+ int data;
} ParallelScanJob;
static ScanCopyContext
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
SGEN_TV_GETTIME (atv);
- major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
+ major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
SGEN_TV_GETTIME (btv);
time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count);
SGEN_TV_GETTIME (btv);
time_minor_scan_los += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (atv);
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
+ SGEN_TV_GETTIME (btv);
+ time_major_scan_mod_union_blocks += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
+ SGEN_TV_GETTIME (atv);
sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (btv);
+ time_major_scan_mod_union_los += SGEN_TV_ELAPSED (atv, btv);
+
+ if (worker_data_untyped)
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
+ SGEN_TV_GETTIME (atv);
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count, job_data->data);
+ SGEN_TV_GETTIME (btv);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+ g_assert (worker_data_untyped);
+ ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
{
+ SGEN_TV_DECLARE (atv);
+ SGEN_TV_DECLARE (btv);
ParallelScanJob *job_data = (ParallelScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
-
+ SGEN_TV_GETTIME (atv);
sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count);
+ SGEN_TV_GETTIME (btv);
+
+ g_assert (worker_data_untyped);
+ ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv);
}
static void
{
ParallelScanJob *psj;
ScanJob *sj;
+ size_t num_major_sections = major_collector.get_num_major_sections ();
int split_count = sgen_workers_get_job_split_count (GENERATION_OLD);
int i;
/* Mod union preclean jobs */
psj->scan_job.gc_thread_gray_queue = NULL;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_OLD, &psj->scan_job.job, TRUE);
}
enqueue_scan_remembered_set_jobs (SgenGrayQueue *gc_thread_gray_queue, SgenObjectOperations *ops, gboolean enqueue)
{
int i, split_count = sgen_workers_get_job_split_count (GENERATION_NURSERY);
+ size_t num_major_sections = major_collector.get_num_major_sections ();
ScanJob *sj;
sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan wbroots", job_scan_wbroots, sizeof (ScanJob));
psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_NURSERY, &psj->scan_job.job, enqueue);
psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS remsets", job_scan_los_card_table, sizeof (ParallelScanJob));
TV_DECLARE (btv);
SGEN_TV_DECLARE (last_minor_collection_start_tv);
SGEN_TV_DECLARE (last_minor_collection_end_tv);
+ guint64 major_scan_start = time_minor_scan_major_blocks;
+ guint64 los_scan_start = time_minor_scan_los;
+ guint64 finish_gray_start = time_minor_finish_gray_stack;
if (disable_minor_collections)
return TRUE;
current_collection_generation = -1;
objects_pinned = 0;
+ if (is_parallel)
+ binary_protocol_collection_end_stats (0, 0, time_minor_finish_gray_stack - finish_gray_start);
+ else
+ binary_protocol_collection_end_stats (
+ time_minor_scan_major_blocks - major_scan_start,
+ time_minor_scan_los - los_scan_start,
+ time_minor_finish_gray_stack - finish_gray_start);
+
binary_protocol_collection_end (gc_stats.minor_gc_count - 1, GENERATION_NURSERY, 0, 0);
if (check_nursery_objects_pinned && !sgen_minor_collector.is_split)
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
if (object_ops_par != NULL)
sgen_workers_set_num_active_workers (GENERATION_OLD, 0);
- if (sgen_workers_have_idle_work (GENERATION_OLD)) {
+ if (object_ops_par == NULL && sgen_workers_have_idle_work (GENERATION_OLD)) {
/*
* We force the finish of the worker with the new object ops context
- * which can also do copying. We need to have finished pinning.
+ * which can also do copying. We need to have finished pinning. On the
+ * parallel collector, there is no need to drain the private queues
+ * here, since we can do it as part of the finishing work, achieving
+ * better work distribution.
*/
sgen_workers_start_all_workers (GENERATION_OLD, object_ops_nopar, object_ops_par, NULL);
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
int i, split_count = sgen_workers_get_job_split_count (GENERATION_OLD);
+ size_t num_major_sections = major_collector.get_num_major_sections ();
gboolean parallel = object_ops_par != NULL;
/* If we're not parallel we finish the collection on the gc thread */
psj->scan_job.gc_thread_gray_queue = gc_thread_gray_queue;
psj->job_index = i;
psj->job_split_count = split_count;
+ psj->data = num_major_sections / split_count;
sgen_workers_enqueue_job (GENERATION_OLD, &psj->scan_job.job, parallel);
psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_card_table, sizeof (ParallelScanJob));
mword fragment_total;
TV_DECLARE (atv);
TV_DECLARE (btv);
-
- TV_GETTIME (btv);
+ guint64 major_scan_start = time_major_scan_mod_union_blocks;
+ guint64 los_scan_start = time_major_scan_mod_union_los;
+ guint64 finish_gray_start = time_major_finish_gray_stack;
if (concurrent_collection_in_progress) {
SgenObjectOperations *object_ops_par = NULL;
sgen_workers_assert_gray_queue_is_empty (GENERATION_OLD);
+ TV_GETTIME (btv);
finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue));
TV_GETTIME (atv);
time_major_finish_gray_stack += TV_ELAPSED (btv, atv);
binary_protocol_flush_buffers (FALSE);
//consistency_check ();
+ if (major_collector.is_parallel)
+ binary_protocol_collection_end_stats (0, 0, time_major_finish_gray_stack - finish_gray_start);
+ else
+ binary_protocol_collection_end_stats (
+ time_major_scan_mod_union_blocks - major_scan_start,
+ time_major_scan_mod_union_los - los_scan_start,
+ time_major_finish_gray_stack - finish_gray_start);
binary_protocol_collection_end (gc_stats.major_gc_count - 1, GENERATION_OLD, counts.num_scanned_objects, counts.num_unique_scanned_objects);
}
/*
* Use concurrent major and dynamic nursery with a more
* aggressive shrinking relative to pause times.
- * FIXME use parallel minors
*/
- minor = SGEN_MINOR_SIMPLE;
+ minor = SGEN_MINOR_SIMPLE_PARALLEL;
major = SGEN_MAJOR_CONCURRENT;
dynamic_nursery = TRUE;
sgen_max_pause_margin = SGEN_PAUSE_MODE_MAX_PAUSE_MARGIN;
void (*free_non_pinned_object) (GCObject *obj, size_t size);
void (*pin_objects) (SgenGrayQueue *queue);
void (*pin_major_object) (GCObject *obj, SgenGrayQueue *queue);
- void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count);
+ void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count);
void (*iterate_live_block_ranges) (sgen_cardtable_block_callback callback);
void (*iterate_block_ranges) (sgen_cardtable_block_callback callback);
void (*update_cardtable_mod_union) (void);
/* non-allocated block free-list */
static void *empty_blocks = NULL;
static size_t num_empty_blocks = 0;
+static gboolean compact_blocks = FALSE;
/*
* We can iterate the block list also while sweep is in progress but we
(bl) = BLOCK_UNTAG ((bl));
#define END_FOREACH_BLOCK_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
+#define FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) { \
+ volatile gpointer *slot; \
+ SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&allocated_blocks, begin, end, slot, index) { \
+ (bl) = (MSBlockInfo *) (*slot); \
+ if (!(bl)) \
+ continue; \
+ (hr) = BLOCK_IS_TAGGED_HAS_REFERENCES ((bl)); \
+ (bl) = BLOCK_UNTAG ((bl));
+#define END_FOREACH_BLOCK_RANGE_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
+
static volatile size_t num_major_sections = 0;
/*
* One free block list for each block object size. We add and remove blocks from these
sgen_workers_foreach (GENERATION_NURSERY, sgen_worker_clear_free_block_lists);
sgen_workers_foreach (GENERATION_OLD, sgen_worker_clear_free_block_lists);
+
+ compact_blocks = TRUE;
}
static void sweep_finish (void);
#endif
old_num_major_sections = num_major_sections;
+
+ /* Compact the block list if it hasn't been compacted in a while and nobody is using it */
+ if (compact_blocks && !sweep_in_progress () && !sweep_blocks_job && !sgen_concurrent_collection_in_progress ()) {
+ /*
+ * We support null elements in the array but do regular compaction to avoid
+ * excessive traversal of the array and to facilitate splitting into well
+ * balanced sections for parallel modes. We compact as soon as possible after
+ * sweep.
+ */
+ sgen_array_list_remove_nulls (&allocated_blocks);
+ compact_blocks = FALSE;
+ }
}
static void
}
static void
-major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
+major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count, int block_count)
{
MSBlockInfo *block;
gboolean has_references, was_sweeping, skip_scan;
+ int first_block, last_block, index;
+
+ /*
+ * The last_block's index is at least (num_major_sections - 1) since we
+ * can have nulls in the allocated_blocks list. The last worker will
+ * scan the left-overs of the list. We expect few null entries in the
+ * allocated_blocks list, therefore using num_major_sections for computing
+ * block_count shouldn't affect work distribution.
+ */
+ first_block = block_count * job_index;
+ if (job_index == job_split_count - 1)
+ last_block = allocated_blocks.next_slot;
+ else
+ last_block = block_count * (job_index + 1);
if (!concurrent_mark)
g_assert (scan_type == CARDTABLE_SCAN_GLOBAL);
was_sweeping = sweep_in_progress ();
binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
- FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
- if (__index % job_split_count != job_index)
- continue;
+ FOREACH_BLOCK_RANGE_HAS_REFERENCES_NO_LOCK (block, first_block, last_block, index, has_references) {
#ifdef PREFETCH_CARDS
- int prefetch_index = __index + 6 * job_split_count;
+ int prefetch_index = index + 6;
if (prefetch_index < allocated_blocks.next_slot) {
MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
PREFETCH_READ (prefetch_block);
}
}
#endif
-
if (!has_references)
continue;
skip_scan = FALSE;
* sweep start since we are in a nursery collection. Also avoid CAS-ing
*/
if (sweep_in_progress ()) {
- skip_scan = !ensure_block_is_checked_for_sweeping (__index, TRUE, NULL);
+ skip_scan = !ensure_block_is_checked_for_sweeping (index, TRUE, NULL);
} else if (was_sweeping) {
/* Recheck in case sweep finished after dereferencing the slot */
- skip_scan = *sgen_array_list_get_slot (&allocated_blocks, __index) == 0;
+ skip_scan = *sgen_array_list_get_slot (&allocated_blocks, index) == 0;
}
}
}
if (!skip_scan)
scan_card_table_for_block (block, scan_type, ctx);
- } END_FOREACH_BLOCK_NO_LOCK;
+ } END_FOREACH_BLOCK_RANGE_NO_LOCK;
binary_protocol_major_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
}
IS_VTABLE_MATCH (ptr == entry->value_vtable)
END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_major_card_table_scan_start, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_major_card_table_scan_start, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_major_card_table_scan_end, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_major_card_table_scan_end, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_los_card_table_scan_start, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_los_card_table_scan_start, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_los_card_table_scan_end, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_los_card_table_scan_end, TYPE_LONGLONG, timestamp, TYPE_BOOL, mod_union)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_finish_gray_stack_start, TYPE_LONGLONG, timestamp, TYPE_INT, generation)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_finish_gray_stack_start, TYPE_LONGLONG, timestamp, TYPE_INT, generation)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
-BEGIN_PROTOCOL_ENTRY2 (binary_protocol_finish_gray_stack_end, TYPE_LONGLONG, timestamp, TYPE_INT, generation)
+BEGIN_PROTOCOL_ENTRY_HEAVY2 (binary_protocol_finish_gray_stack_end, TYPE_LONGLONG, timestamp, TYPE_INT, generation)
DEFAULT_PRINT ()
IS_ALWAYS_MATCH (TRUE)
MATCH_INDEX (BINARY_PROTOCOL_MATCH)
IS_VTABLE_MATCH (FALSE)
-END_PROTOCOL_ENTRY
+END_PROTOCOL_ENTRY_HEAVY
BEGIN_PROTOCOL_ENTRY2 (binary_protocol_worker_finish, TYPE_LONGLONG, timestamp, TYPE_BOOL, forced)
DEFAULT_PRINT ()
IS_VTABLE_MATCH (FALSE)
END_PROTOCOL_ENTRY
+BEGIN_PROTOCOL_ENTRY6 (binary_protocol_worker_finish_stats, TYPE_INT, worker_index, TYPE_INT, generation, TYPE_BOOL, forced, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, work_time)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (TRUE)
+MATCH_INDEX (BINARY_PROTOCOL_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY
+
+BEGIN_PROTOCOL_ENTRY3 (binary_protocol_collection_end_stats, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, finish_stack)
+DEFAULT_PRINT ()
+IS_ALWAYS_MATCH (TRUE)
+MATCH_INDEX (BINARY_PROTOCOL_MATCH)
+IS_VTABLE_MATCH (FALSE)
+END_PROTOCOL_ENTRY
+
#undef BEGIN_PROTOCOL_ENTRY0
#undef BEGIN_PROTOCOL_ENTRY1
#undef BEGIN_PROTOCOL_ENTRY2
static int threads_num;
static MonoNativeThreadId threads [SGEN_THREADPOOL_MAX_NUM_THREADS];
+static int threads_context [SGEN_THREADPOOL_MAX_NUM_THREADS];
static volatile gboolean threadpool_shutdown;
static volatile int threads_finished;
}
static mono_native_thread_return_t
-thread_func (int worker_index)
+thread_func (void *data)
{
+ int worker_index = (int)(gsize)data;
int current_context;
void *thread_data = NULL;
SgenThreadPoolJob *job = NULL;
SgenThreadPoolContext *context = NULL;
+ threads_context [worker_index] = -1;
get_work (worker_index, ¤t_context, &do_idle, &job);
+ threads_context [worker_index] = current_context;
if (!threadpool_shutdown) {
context = &pool_contexts [current_context];
}
void
-sgen_thread_pool_idle_wait (int context_id)
+sgen_thread_pool_idle_wait (int context_id, SgenThreadPoolContinueIdleWaitFunc continue_wait)
{
SGEN_ASSERT (0, pool_contexts [context_id].idle_job_func, "Why are we waiting for idle without an idle function?");
mono_os_mutex_lock (&lock);
- while (pool_contexts [context_id].continue_idle_job_func (NULL, context_id))
+ while (continue_wait (context_id, threads_context))
mono_os_cond_wait (&done_cond, &lock);
mono_os_mutex_unlock (&lock);
typedef void (*SgenThreadPoolIdleJobFunc) (void*);
typedef gboolean (*SgenThreadPoolContinueIdleJobFunc) (void*, int);
typedef gboolean (*SgenThreadPoolShouldWorkFunc) (void*);
+typedef gboolean (*SgenThreadPoolContinueIdleWaitFunc) (int, int*);
struct _SgenThreadPoolJob {
const char *name;
void sgen_thread_pool_job_wait (int context_id, SgenThreadPoolJob *job);
void sgen_thread_pool_idle_signal (int context_id);
-void sgen_thread_pool_idle_wait (int context_id);
+void sgen_thread_pool_idle_wait (int context_id, SgenThreadPoolContinueIdleWaitFunc continue_wait);
void sgen_thread_pool_wait_for_all_jobs (int context_id);
SGEN_ASSERT (0, old_state == STATE_WORKING, "We can only transition to NOT WORKING from WORKING");
else if (new_state == STATE_WORKING)
SGEN_ASSERT (0, old_state == STATE_WORK_ENQUEUED, "We can only transition to WORKING from WORK ENQUEUED");
- if (new_state == STATE_NOT_WORKING || new_state == STATE_WORKING)
- SGEN_ASSERT (6, sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "Only the worker thread is allowed to transition to NOT_WORKING or WORKING");
return InterlockedCompareExchange (&data->state, new_state, old_state) == old_state;
}
break;
did_set_state = set_state (&context->workers_data [i], old_state, STATE_WORK_ENQUEUED);
+
+ if (did_set_state && old_state == STATE_NOT_WORKING)
+ context->workers_data [i].last_start = sgen_timestamp ();
} while (!did_set_state);
if (!state_is_working_or_enqueued (old_state))
State old_state;
int i, working = 0;
WorkerContext *context = data->context;
+ gint64 last_start = data->last_start;
++stat_workers_num_finished;
/* Make sure each worker has a chance of seeing the enqueued jobs */
sgen_workers_ensure_awake (context);
SGEN_ASSERT (0, data->state == STATE_WORK_ENQUEUED, "Why did we fail to set our own state to ENQUEUED");
+
+ /*
+ * Log to be able to get the duration of normal concurrent M&S phase.
+ * Worker indexes are 1 based, since 0 is logically considered gc thread.
+ */
+ binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time + sgen_timestamp () - last_start);
goto work_available;
}
}
context->workers_finished = TRUE;
mono_os_mutex_unlock (&context->finished_lock);
- binary_protocol_worker_finish (sgen_timestamp (), context->forced_stop);
+ data->total_time += (sgen_timestamp () - last_start);
+ binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time);
sgen_gray_object_queue_trim_free_list (&data->private_gray_queue);
return;
}
}
+/* This is called with thread pool lock so no context switch can happen */
+static gboolean
+continue_idle_wait (int calling_context, int *threads_context)
+{
+ WorkerContext *context;
+ int i;
+
+ if (worker_contexts [GENERATION_OLD].workers_num && calling_context == worker_contexts [GENERATION_OLD].thread_pool_context)
+ context = &worker_contexts [GENERATION_OLD];
+ else if (worker_contexts [GENERATION_NURSERY].workers_num && calling_context == worker_contexts [GENERATION_NURSERY].thread_pool_context)
+ context = &worker_contexts [GENERATION_NURSERY];
+ else
+ g_assert_not_reached ();
+
+ /*
+ * We assume there are no pending jobs, since this is called only after
+ * we waited for all the jobs.
+ */
+ for (i = 0; i < context->active_workers_num; i++) {
+ if (threads_context [i] == calling_context)
+ return TRUE;
+ }
+
+ if (sgen_workers_have_idle_work (context->generation) && !context->forced_stop)
+ return TRUE;
+
+ /*
+ * At this point there are no jobs to be done, and no objects to be scanned
+ * in the gray queues. We can simply asynchronously finish all the workers
+ * from the context that were not finished already (due to being stuck working
+ * in another context)
+ */
+
+ for (i = 0; i < context->active_workers_num; i++) {
+ if (context->workers_data [i].state == STATE_WORK_ENQUEUED)
+ set_state (&context->workers_data [i], STATE_WORK_ENQUEUED, STATE_WORKING);
+ if (context->workers_data [i].state == STATE_WORKING)
+ worker_try_finish (&context->workers_data [i]);
+ }
+
+ return FALSE;
+}
+
+
void
sgen_workers_stop_all_workers (int generation)
{
WorkerContext *context = &worker_contexts [generation];
+ mono_os_mutex_lock (&context->finished_lock);
context->finish_callback = NULL;
- mono_memory_write_barrier ();
+ mono_os_mutex_unlock (&context->finished_lock);
+
context->forced_stop = TRUE;
sgen_thread_pool_wait_for_all_jobs (context->thread_pool_context);
- sgen_thread_pool_idle_wait (context->thread_pool_context);
+ sgen_thread_pool_idle_wait (context->thread_pool_context, continue_idle_wait);
SGEN_ASSERT (0, !sgen_workers_are_working (context), "Can only signal enqueue work when in no work state");
context->started = FALSE;
sgen_workers_start_all_workers (int generation, SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback callback)
{
WorkerContext *context = &worker_contexts [generation];
+ int i;
SGEN_ASSERT (0, !context->started, "Why are we starting to work without finishing previous cycle");
context->idle_func_object_ops_par = object_ops_par;
context->finish_callback = callback;
context->worker_awakenings = 0;
context->started = TRUE;
+
+ for (i = 0; i < context->active_workers_num; i++) {
+ context->workers_data [i].major_scan_time = 0;
+ context->workers_data [i].los_scan_time = 0;
+ context->workers_data [i].total_time = 0;
+ context->workers_data [i].last_start = 0;
+ }
mono_memory_write_barrier ();
/*
WorkerContext *context = &worker_contexts [generation];
int i;
- /*
- * It might be the case that a worker didn't get to run anything
- * in this context, because it was stuck working on a long job
- * in another context. In this case its state is active (WORK_ENQUEUED)
- * and we need to wait for it to finish itself.
- * FIXME Avoid having to wait for the worker to report its own finish.
- */
+ SGEN_ASSERT (0, !context->finish_callback, "Why are we joining concurrent mark early");
sgen_thread_pool_wait_for_all_jobs (context->thread_pool_context);
- sgen_thread_pool_idle_wait (context->thread_pool_context);
+ sgen_thread_pool_idle_wait (context->thread_pool_context, continue_idle_wait);
SGEN_ASSERT (0, !sgen_workers_are_working (context), "Can only signal enqueue work when in no work state");
/* At this point all the workers have stopped. */
}
/*
- * Can only be called if the workers are stopped.
- * If we're stopped, there are also no pending jobs.
+ * Can only be called if the workers are not working in the
+ * context and there are no pending jobs.
*/
gboolean
sgen_workers_have_idle_work (int generation)
WorkerContext *context = &worker_contexts [generation];
int i;
- SGEN_ASSERT (0, context->forced_stop && !sgen_workers_are_working (context), "Checking for idle work should only happen if the workers are stopped.");
-
if (!sgen_section_gray_queue_is_empty (&context->workers_distribute_gray_queue))
return TRUE;
*/
gpointer free_block_lists;
WorkerContext *context;
+
+ /* Work time distribution. Measured in ticks. */
+ gint64 major_scan_time, los_scan_time, total_time;
+ /*
+ * When changing the state of the worker from not working to work enqueued
+ * we set the timestamp so we can compute for how long the worker did actual
+ * work during the phase
+ */
+ gint64 last_start;
};
struct _WorkerContext {