From 1f19829f9165b787ab426ecfd16e7ec92124c6b5 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Sat, 1 Jul 2017 01:47:46 +0300 Subject: [PATCH] [sgen] Add explicit binary protocol stats for collections Using the cardtable scan and finish_gray_stack start and end events is cumbersome to parse and require a lot of logging, especially in the case of the parallel collector. --- mono/metadata/sgen-client-mono.h | 10 ++++++ mono/sgen/sgen-gc.c | 52 +++++++++++++++++++++++++++++--- mono/sgen/sgen-protocol-def.h | 14 +++++++++ mono/sgen/sgen-workers.c | 21 +++++++++++++ mono/sgen/sgen-workers.h | 9 ++++++ 5 files changed, 102 insertions(+), 4 deletions(-) diff --git a/mono/metadata/sgen-client-mono.h b/mono/metadata/sgen-client-mono.h index 78c0126396c..ab59e4f639d 100644 --- a/mono/metadata/sgen-client-mono.h +++ b/mono/metadata/sgen-client-mono.h @@ -690,6 +690,16 @@ sgen_client_binary_protocol_pin_stats (int objects_pinned_in_nursery, size_t byt { } +static void G_GNUC_UNUSED +sgen_client_binary_protocol_worker_finish_stats (int worker_index, int generation, gboolean forced, long long major_scan, long long los_scan, long long work_time) +{ +} + +static void G_GNUC_UNUSED +sgen_client_binary_protocol_collection_end_stats (long long major_scan, long long los_scan, long long finish_stack) +{ +} + #define TLAB_ACCESS_INIT SgenThreadInfo *__thread_info__ = (SgenThreadInfo*)mono_tls_get_sgen_thread_info () #define IN_CRITICAL_REGION (__thread_info__->client_info.in_critical_region) diff --git a/mono/sgen/sgen-gc.c b/mono/sgen/sgen-gc.c index a58d236da1f..ea67897afca 100644 --- a/mono/sgen/sgen-gc.c +++ b/mono/sgen/sgen-gc.c @@ -1449,6 +1449,9 @@ job_scan_major_card_table (void *worker_data_untyped, SgenThreadPoolJob *job) major_collector.scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count); SGEN_TV_GETTIME (btv); time_minor_scan_major_blocks += SGEN_TV_ELAPSED (atv, btv); + + if (worker_data_untyped) + ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void @@ -1463,6 +1466,9 @@ job_scan_los_card_table (void *worker_data_untyped, SgenThreadPoolJob *job) sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, job_data->job_index, job_data->job_split_count); SGEN_TV_GETTIME (btv); time_minor_scan_los += SGEN_TV_ELAPSED (atv, btv); + + if (worker_data_untyped) + ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void @@ -1478,6 +1484,9 @@ job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJo major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count); SGEN_TV_GETTIME (btv); time_major_scan_mod_union_blocks += SGEN_TV_ELAPSED (atv, btv); + + if (worker_data_untyped) + ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void @@ -1493,28 +1502,43 @@ job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, job_data->job_split_count); SGEN_TV_GETTIME (btv); time_major_scan_mod_union_los += SGEN_TV_ELAPSED (atv, btv); + + if (worker_data_untyped) + ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job) { + SGEN_TV_DECLARE (atv); + SGEN_TV_DECLARE (btv); ParallelScanJob *job_data = (ParallelScanJob*)job; ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data); g_assert (concurrent_collection_in_progress); - + SGEN_TV_GETTIME (atv); major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count); + SGEN_TV_GETTIME (btv); + + g_assert (worker_data_untyped); + ((WorkerData*)worker_data_untyped)->major_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job) { + SGEN_TV_DECLARE (atv); + SGEN_TV_DECLARE (btv); ParallelScanJob *job_data = (ParallelScanJob*)job; ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data); g_assert (concurrent_collection_in_progress); - + SGEN_TV_GETTIME (atv); sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, job_data->job_split_count); + SGEN_TV_GETTIME (btv); + + g_assert (worker_data_untyped); + ((WorkerData*)worker_data_untyped)->los_scan_time += SGEN_TV_ELAPSED (atv, btv); } static void @@ -1662,6 +1686,9 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ TV_DECLARE (btv); SGEN_TV_DECLARE (last_minor_collection_start_tv); SGEN_TV_DECLARE (last_minor_collection_end_tv); + guint64 major_scan_start = time_minor_scan_major_blocks; + guint64 los_scan_start = time_minor_scan_los; + guint64 finish_gray_start = time_minor_finish_gray_stack; if (disable_minor_collections) return TRUE; @@ -1856,6 +1883,14 @@ collect_nursery (const char *reason, gboolean is_overflow, SgenGrayQueue *unpin_ current_collection_generation = -1; objects_pinned = 0; + if (is_parallel) + binary_protocol_collection_end_stats (0, 0, time_minor_finish_gray_stack - finish_gray_start); + else + binary_protocol_collection_end_stats ( + time_minor_scan_major_blocks - major_scan_start, + time_minor_scan_los - los_scan_start, + time_minor_finish_gray_stack - finish_gray_start); + binary_protocol_collection_end (gc_stats.minor_gc_count - 1, GENERATION_NURSERY, 0, 0); if (check_nursery_objects_pinned && !sgen_minor_collector.is_split) @@ -2150,8 +2185,9 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason mword fragment_total; TV_DECLARE (atv); TV_DECLARE (btv); - - TV_GETTIME (btv); + guint64 major_scan_start = time_major_scan_mod_union_blocks; + guint64 los_scan_start = time_major_scan_mod_union_los; + guint64 finish_gray_start = time_major_finish_gray_stack; if (concurrent_collection_in_progress) { SgenObjectOperations *object_ops_par = NULL; @@ -2171,6 +2207,7 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason sgen_workers_assert_gray_queue_is_empty (GENERATION_OLD); + TV_GETTIME (btv); finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue)); TV_GETTIME (atv); time_major_finish_gray_stack += TV_ELAPSED (btv, atv); @@ -2272,6 +2309,13 @@ major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason binary_protocol_flush_buffers (FALSE); //consistency_check (); + if (major_collector.is_parallel) + binary_protocol_collection_end_stats (0, 0, time_major_finish_gray_stack - finish_gray_start); + else + binary_protocol_collection_end_stats ( + time_major_scan_mod_union_blocks - major_scan_start, + time_major_scan_mod_union_los - los_scan_start, + time_major_finish_gray_stack - finish_gray_start); binary_protocol_collection_end (gc_stats.major_gc_count - 1, GENERATION_OLD, counts.num_scanned_objects, counts.num_unique_scanned_objects); } diff --git a/mono/sgen/sgen-protocol-def.h b/mono/sgen/sgen-protocol-def.h index 08bfdfe890e..4842ce3b8ca 100644 --- a/mono/sgen/sgen-protocol-def.h +++ b/mono/sgen/sgen-protocol-def.h @@ -457,6 +457,20 @@ MATCH_INDEX (BINARY_PROTOCOL_MATCH) IS_VTABLE_MATCH (FALSE) END_PROTOCOL_ENTRY +BEGIN_PROTOCOL_ENTRY6 (binary_protocol_worker_finish_stats, TYPE_INT, worker_index, TYPE_INT, generation, TYPE_BOOL, forced, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, work_time) +DEFAULT_PRINT () +IS_ALWAYS_MATCH (TRUE) +MATCH_INDEX (BINARY_PROTOCOL_MATCH) +IS_VTABLE_MATCH (FALSE) +END_PROTOCOL_ENTRY + +BEGIN_PROTOCOL_ENTRY3 (binary_protocol_collection_end_stats, TYPE_LONGLONG, major_scan, TYPE_LONGLONG, los_scan, TYPE_LONGLONG, finish_stack) +DEFAULT_PRINT () +IS_ALWAYS_MATCH (TRUE) +MATCH_INDEX (BINARY_PROTOCOL_MATCH) +IS_VTABLE_MATCH (FALSE) +END_PROTOCOL_ENTRY + #undef BEGIN_PROTOCOL_ENTRY0 #undef BEGIN_PROTOCOL_ENTRY1 #undef BEGIN_PROTOCOL_ENTRY2 diff --git a/mono/sgen/sgen-workers.c b/mono/sgen/sgen-workers.c index f999a5bdb2e..7f926b4f149 100644 --- a/mono/sgen/sgen-workers.c +++ b/mono/sgen/sgen-workers.c @@ -92,6 +92,9 @@ sgen_workers_ensure_awake (WorkerContext *context) break; did_set_state = set_state (&context->workers_data [i], old_state, STATE_WORK_ENQUEUED); + + if (did_set_state && old_state == STATE_NOT_WORKING) + context->workers_data [i].last_start = sgen_timestamp (); } while (!did_set_state); if (!state_is_working_or_enqueued (old_state)) @@ -108,6 +111,7 @@ worker_try_finish (WorkerData *data) State old_state; int i, working = 0; WorkerContext *context = data->context; + gint64 last_start = data->last_start; ++stat_workers_num_finished; @@ -130,6 +134,12 @@ worker_try_finish (WorkerData *data) /* Make sure each worker has a chance of seeing the enqueued jobs */ sgen_workers_ensure_awake (context); SGEN_ASSERT (0, data->state == STATE_WORK_ENQUEUED, "Why did we fail to set our own state to ENQUEUED"); + + /* + * Log to be able to get the duration of normal concurrent M&S phase. + * Worker indexes are 1 based, since 0 is logically considered gc thread. + */ + binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time + sgen_timestamp () - last_start); goto work_available; } } @@ -154,6 +164,9 @@ worker_try_finish (WorkerData *data) context->workers_finished = TRUE; mono_os_mutex_unlock (&context->finished_lock); + data->total_time += (sgen_timestamp () - last_start); + binary_protocol_worker_finish_stats (data - &context->workers_data [0] + 1, context->generation, context->forced_stop, data->major_scan_time, data->los_scan_time, data->total_time); + binary_protocol_worker_finish (sgen_timestamp (), context->forced_stop); sgen_gray_object_queue_trim_free_list (&data->private_gray_queue); @@ -454,6 +467,7 @@ void sgen_workers_start_all_workers (int generation, SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback callback) { WorkerContext *context = &worker_contexts [generation]; + int i; SGEN_ASSERT (0, !context->started, "Why are we starting to work without finishing previous cycle"); context->idle_func_object_ops_par = object_ops_par; @@ -462,6 +476,13 @@ sgen_workers_start_all_workers (int generation, SgenObjectOperations *object_ops context->finish_callback = callback; context->worker_awakenings = 0; context->started = TRUE; + + for (i = 0; i < context->active_workers_num; i++) { + context->workers_data [i].major_scan_time = 0; + context->workers_data [i].los_scan_time = 0; + context->workers_data [i].total_time = 0; + context->workers_data [i].last_start = 0; + } mono_memory_write_barrier (); /* diff --git a/mono/sgen/sgen-workers.h b/mono/sgen/sgen-workers.h index 42dc16ac4ae..e3147f051af 100644 --- a/mono/sgen/sgen-workers.h +++ b/mono/sgen/sgen-workers.h @@ -32,6 +32,15 @@ struct _WorkerData { */ gpointer free_block_lists; WorkerContext *context; + + /* Work time distribution. Measured in ticks. */ + gint64 major_scan_time, los_scan_time, total_time; + /* + * When changing the state of the worker from not working to work enqueued + * we set the timestamp so we can compute for how long the worker did actual + * work during the phase + */ + gint64 last_start; }; struct _WorkerContext { -- 2.25.1