#define ARRAY_OBJ_INDEX(ptr,array,elem_size) (((char*)(ptr) - ((char*)(array) + G_STRUCT_OFFSET (MonoArray, vector))) / (elem_size))
gboolean
-sgen_client_cardtable_scan_object (GCObject *obj, mword block_obj_size, guint8 *cards, ScanCopyContext ctx)
+sgen_client_cardtable_scan_object (GCObject *obj, guint8 *cards, ScanCopyContext ctx)
{
MonoVTable *vt = SGEN_LOAD_VTABLE (obj);
MonoClass *klass = vt->klass;
libmono_profiler_vtune_la_LDFLAGS = $(prof_ldflags)
libmono_profiler_vtune_static_la_SOURCES = mono-profiler-vtune.c
libmono_profiler_vtune_static_la_LDFLAGS = -static
+libmono_profiler_vtune_static_la_CFLAGS = $(VTUNE_CFLAGS)
+libmono_profiler_vtune_static_la_LIBADD = $(VTUNE_LIBS)
endif
mprof_report_SOURCES = mprof-report.c
return index;
}
-/*
- * Removes all NULL pointers from the array. Not thread safe
- */
-void
-sgen_array_list_remove_nulls (SgenArrayList *array)
-{
- guint32 start = 0;
- volatile gpointer *slot;
-
- SGEN_ARRAY_LIST_FOREACH_SLOT (array, slot) {
- if (*slot)
- *sgen_array_list_get_slot (array, start++) = *slot;
- } SGEN_ARRAY_LIST_END_FOREACH_SLOT;
-
- mono_memory_write_barrier ();
- array->next_slot = start;
-}
-
/*
* Does a linear search through the pointer array to find `ptr`. Returns the index if
* found, otherwise (guint32)-1.
return (guint32)-1;
}
+gboolean
+sgen_array_list_default_cas_setter (volatile gpointer *slot, gpointer ptr, int data)
+{
+ if (InterlockedCompareExchangePointer (slot, ptr, NULL) == NULL)
+ return TRUE;
+ return FALSE;
+}
+
+gboolean
+sgen_array_list_default_is_slot_set (volatile gpointer *slot)
+{
+ return *slot != NULL;
+}
+
#endif
guint32 sgen_array_list_alloc_block (SgenArrayList *array, guint32 slots_to_add);
guint32 sgen_array_list_add (SgenArrayList *array, gpointer ptr, int data, gboolean increase_size_before_set);
guint32 sgen_array_list_find (SgenArrayList *array, gpointer ptr);
-void sgen_array_list_remove_nulls (SgenArrayList *array);
+gboolean sgen_array_list_default_cas_setter (volatile gpointer *slot, gpointer ptr, int data);
+gboolean sgen_array_list_default_is_slot_set (volatile gpointer *slot);
+
#endif
sgen_card_table_clear_cards ();
#endif
SGEN_TV_GETTIME (atv);
- sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx);
+ sgen_get_major_collector ()->scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
SGEN_TV_GETTIME (btv);
last_major_scan_time = SGEN_TV_ELAPSED (atv, btv);
major_card_scan_time += last_major_scan_time;
- sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx);
+ sgen_los_scan_card_table (CARDTABLE_SCAN_GLOBAL, ctx, 0, 1);
SGEN_TV_GETTIME (atv);
last_los_scan_time = SGEN_TV_ELAPSED (btv, atv);
los_card_scan_time += last_los_scan_time;
{
HEAVY_STAT (++large_objects);
- if (sgen_client_cardtable_scan_object (obj, block_obj_size, cards, ctx))
+ if (sgen_client_cardtable_scan_object (obj, cards, ctx))
return;
HEAVY_STAT (++bloby_objects);
* parts of the object based on which cards are marked, do so and return TRUE. Otherwise,
* return FALSE.
*/
-gboolean sgen_client_cardtable_scan_object (GCObject *obj, mword block_obj_size, guint8 *cards, ScanCopyContext ctx);
+gboolean sgen_client_cardtable_scan_object (GCObject *obj, guint8 *cards, ScanCopyContext ctx);
/*
* Called after nursery objects have been pinned. No action is necessary.
extern guint64 stat_slots_allocated_in_vain;
/*
- * Copies an object and enqueues it if a queue is given.
- *
* This function can be used even if the vtable of obj is not valid
* anymore, which is the case in the parallel collector.
*/
static MONO_ALWAYS_INLINE void
-par_copy_object_no_checks (char *destination, GCVTable vt, void *obj, mword objsize, SgenGrayQueue *queue)
+par_copy_object_no_checks (char *destination, GCVTable vt, void *obj, mword objsize)
{
sgen_client_pre_copy_checks (destination, vt, obj, objsize);
binary_protocol_copy (obj, destination, vt, objsize);
SGEN_ASSERT (9, sgen_vtable_get_descriptor (vt), "vtable %p has no gc descriptor", vt);
sgen_client_update_copied_object (destination, vt, obj, objsize);
- obj = destination;
- if (queue) {
- SGEN_LOG (9, "Enqueuing gray object %p (%s)", obj, sgen_client_vtable_get_name (vt));
- GRAY_OBJECT_ENQUEUE (queue, (GCObject *)obj, sgen_vtable_get_descriptor (vt));
- }
}
/*
+ * Copies an object and enqueues it if a queue is given.
* This can return OBJ itself on OOM.
*/
static MONO_NEVER_INLINE GCObject *
GCVTable vt = SGEN_LOAD_VTABLE_UNCHECKED (obj);
gboolean has_references = SGEN_VTABLE_HAS_REFERENCES (vt);
mword objsize = SGEN_ALIGN_UP (sgen_client_par_object_get_size (vt, obj));
- /* FIXME: Does this not mark the newly allocated object? */
void *destination = COLLECTOR_SERIAL_ALLOC_FOR_PROMOTION (vt, obj, objsize, has_references);
if (G_UNLIKELY (!destination)) {
return obj;
}
- if (!has_references)
- queue = NULL;
-
- par_copy_object_no_checks ((char *)destination, vt, obj, objsize, queue);
- /* FIXME: mark mod union cards if necessary */
+ par_copy_object_no_checks ((char *)destination, vt, obj, objsize);
/* set the forwarding pointer */
SGEN_FORWARD_OBJECT (obj, destination);
+ if (has_references) {
+ SGEN_LOG (9, "Enqueuing gray object %p (%s)", destination, sgen_client_vtable_get_name (vt));
+ GRAY_OBJECT_ENQUEUE_SERIAL (queue, (GCObject *)destination, sgen_vtable_get_descriptor (vt));
+ }
+
return (GCObject *)destination;
}
+#if defined(COPY_OR_MARK_PARALLEL)
+static MONO_NEVER_INLINE GCObject *
+copy_object_no_checks_par (GCObject *obj, SgenGrayQueue *queue)
+{
+ mword vtable_word = *(mword*)obj;
+ GCObject *destination;
+
+ destination = (GCObject*) SGEN_VTABLE_IS_FORWARDED (vtable_word);
+
+ if (!destination) {
+ GCVTable vt = (GCVTable) vtable_word;
+ GCObject *final_destination;
+ /*
+ * At this point we know vt is not tagged and we shouldn't access the vtable through obj
+ * since it could get copied at any time by another thread.
+ */
+ gboolean has_references = SGEN_VTABLE_HAS_REFERENCES (vt);
+ mword objsize = SGEN_ALIGN_UP (sgen_client_par_object_get_size (vt, obj));
+ destination = major_collector.alloc_object_par (vt, objsize, has_references);
+
+ par_copy_object_no_checks ((char*)destination, vt, obj, objsize);
+
+ /* FIXME we might need a membar here so other threads see the vtable before we forward */
+
+ /* set the forwarding pointer */
+ SGEN_FORWARD_OBJECT_PAR (obj, destination, final_destination);
+
+ if (destination == final_destination) {
+ /* In a racing case, only the worker that allocated the object enqueues it */
+ if (has_references) {
+ SGEN_LOG (9, "Enqueuing gray object %p (%s)", destination, sgen_client_vtable_get_name (vt));
+ GRAY_OBJECT_ENQUEUE_PARALLEL (queue, (GCObject *)destination, sgen_vtable_get_descriptor (vt));
+ }
+ } else {
+ destination = final_destination;
+ }
+ }
+
+ return destination;
+}
+#endif
+
#undef COLLECTOR_SERIAL_ALLOC_FOR_PROMOTION
#undef collector_pin_object
+#undef COPY_OR_MARK_PARALLEL
}
static void
-gray_queue_enable_redirect (SgenGrayQueue *queue)
+gray_queue_redirect (SgenGrayQueue *queue)
{
SGEN_ASSERT (0, concurrent_collection_in_progress, "Where are we redirecting the gray queue to, without a concurrent collection?");
- sgen_gray_queue_set_alloc_prepare (queue, sgen_workers_take_from_queue_and_awake);
- sgen_workers_take_from_queue_and_awake (queue);
+ sgen_workers_take_from_queue (queue);
}
void
gboolean
sgen_drain_gray_stack (ScanCopyContext ctx)
{
- ScanObjectFunc scan_func = ctx.ops->scan_object;
- SgenGrayQueue *queue = ctx.queue;
+ SGEN_ASSERT (0, ctx.ops->drain_gray_stack, "Why do we have a scan/copy context with a missing drain gray stack function?");
- if (ctx.ops->drain_gray_stack)
- return ctx.ops->drain_gray_stack (queue);
-
- for (;;) {
- GCObject *obj;
- SgenDescriptor desc;
- GRAY_OBJECT_DEQUEUE (queue, &obj, &desc);
- if (!obj)
- return TRUE;
- SGEN_LOG (9, "Precise gray object scan %p (%s)", obj, sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (obj)));
- scan_func (obj, desc, queue);
- }
- return FALSE;
+ return ctx.ops->drain_gray_stack (ctx.queue);
}
/*
safe_object_get_size (obj_to_pin));
pin_object (obj_to_pin);
- GRAY_OBJECT_ENQUEUE (queue, obj_to_pin, desc);
+ GRAY_OBJECT_ENQUEUE_SERIAL (queue, obj_to_pin, desc);
sgen_pin_stats_register_object (obj_to_pin, GENERATION_NURSERY);
definitely_pinned [count] = obj_to_pin;
count++;
++objects_pinned;
sgen_pin_stats_register_object (object, GENERATION_NURSERY);
- GRAY_OBJECT_ENQUEUE (queue, object, sgen_obj_get_descriptor_safe (object));
+ GRAY_OBJECT_ENQUEUE_SERIAL (queue, object, sgen_obj_get_descriptor_safe (object));
}
/* Sort the addresses in array in increasing order.
SgenGrayQueue *gc_thread_gray_queue;
} ScanJob;
+typedef struct {
+ ScanJob scan_job;
+ int job_index;
+} ParallelScanJob;
+
static ScanCopyContext
scan_copy_context_for_scan_job (void *worker_data_untyped, ScanJob *job)
{
static void
job_scan_major_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
- ScanJob *job_data = (ScanJob*)job;
- ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
+ ParallelScanJob *job_data = (ParallelScanJob*)job;
+ ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
}
static void
job_scan_los_mod_union_card_table (void *worker_data_untyped, SgenThreadPoolJob *job)
{
- ScanJob *job_data = (ScanJob*)job;
- ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
+ ParallelScanJob *job_data = (ParallelScanJob*)job;
+ ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+ g_assert (concurrent_collection_in_progress);
+ sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
+}
+
+static void
+job_major_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+ ParallelScanJob *job_data = (ParallelScanJob*)job;
+ ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
+
+ g_assert (concurrent_collection_in_progress);
+
+ major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
+}
+
+static void
+job_los_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+{
+ ParallelScanJob *job_data = (ParallelScanJob*)job;
+ ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, (ScanJob*)job_data);
g_assert (concurrent_collection_in_progress);
- sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION, ctx);
+
+ sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx, job_data->job_index, sgen_workers_get_job_split_count ());
}
static void
-job_mod_union_preclean (void *worker_data_untyped, SgenThreadPoolJob *job)
+job_scan_last_pinned (void *worker_data_untyped, SgenThreadPoolJob *job)
{
ScanJob *job_data = (ScanJob*)job;
ScanCopyContext ctx = scan_copy_context_for_scan_job (worker_data_untyped, job_data);
g_assert (concurrent_collection_in_progress);
- major_collector.scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
- sgen_los_scan_card_table (CARDTABLE_SCAN_MOD_UNION_PRECLEAN, ctx);
-
sgen_scan_pin_queue_objects (ctx);
}
+static void
+workers_finish_callback (void)
+{
+ ParallelScanJob *psj;
+ ScanJob *sj;
+ int split_count = sgen_workers_get_job_split_count ();
+ int i;
+ /* Mod union preclean jobs */
+ for (i = 0; i < split_count; i++) {
+ psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean major mod union cardtable", job_major_mod_union_preclean, sizeof (ParallelScanJob));
+ psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+ psj->scan_job.gc_thread_gray_queue = NULL;
+ psj->job_index = i;
+ sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+ }
+
+ for (i = 0; i < split_count; i++) {
+ psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("preclean los mod union cardtable", job_los_mod_union_preclean, sizeof (ParallelScanJob));
+ psj->scan_job.ops = sgen_workers_get_idle_func_object_ops ();
+ psj->scan_job.gc_thread_gray_queue = NULL;
+ psj->job_index = i;
+ sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+ }
+
+ sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan last pinned", job_scan_last_pinned, sizeof (ScanJob));
+ sj->ops = sgen_workers_get_idle_func_object_ops ();
+ sj->gc_thread_gray_queue = NULL;
+ sgen_workers_enqueue_job (&sj->job, TRUE);
+}
+
static void
init_gray_queue (SgenGrayQueue *gc_thread_gray_queue, gboolean use_workers)
{
} CopyOrMarkFromRootsMode;
static void
-major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_next_pin_slot, CopyOrMarkFromRootsMode mode, SgenObjectOperations *object_ops)
+major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_next_pin_slot, CopyOrMarkFromRootsMode mode, SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par)
{
LOSObject *bigobj;
TV_DECLARE (atv);
*/
char *heap_start = NULL;
char *heap_end = (char*)-1;
- ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops, gc_thread_gray_queue);
+ ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue);
gboolean concurrent = mode != COPY_OR_MARK_FROM_ROOTS_SERIAL;
SGEN_ASSERT (0, !!concurrent == !!concurrent_collection_in_progress, "We've been called with the wrong mode.");
}
sgen_los_pin_object (bigobj->data);
if (SGEN_OBJECT_HAS_REFERENCES (bigobj->data))
- GRAY_OBJECT_ENQUEUE (gc_thread_gray_queue, bigobj->data, sgen_obj_get_descriptor ((GCObject*)bigobj->data));
+ GRAY_OBJECT_ENQUEUE_SERIAL (gc_thread_gray_queue, bigobj->data, sgen_obj_get_descriptor ((GCObject*)bigobj->data));
sgen_pin_stats_register_object (bigobj->data, GENERATION_OLD);
SGEN_LOG (6, "Marked large object %p (%s) size: %lu from roots", bigobj->data,
sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (bigobj->data)),
SGEN_ASSERT (0, sgen_workers_all_done (), "Why are the workers not done when we start or finish a major collection?");
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
+ sgen_workers_set_num_active_workers (0);
if (sgen_workers_have_idle_work ()) {
/*
* We force the finish of the worker with the new object ops context
* which can also do copying. We need to have finished pinning.
*/
- sgen_workers_start_all_workers (object_ops, NULL);
+ sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, NULL);
+
sgen_workers_join ();
}
}
sgen_client_collecting_major_3 (&fin_ready_queue, &critical_fin_queue);
- enqueue_scan_from_roots_jobs (gc_thread_gray_queue, heap_start, heap_end, object_ops, FALSE);
+ enqueue_scan_from_roots_jobs (gc_thread_gray_queue, heap_start, heap_end, object_ops_nopar, FALSE);
TV_GETTIME (btv);
time_major_scan_roots += TV_ELAPSED (atv, btv);
* the roots.
*/
if (mode == COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT) {
+ sgen_workers_set_num_active_workers (1);
+ gray_queue_redirect (gc_thread_gray_queue);
if (precleaning_enabled) {
- ScanJob *sj;
- /* Mod union preclean job */
- sj = (ScanJob*)sgen_thread_pool_job_alloc ("preclean mod union cardtable", job_mod_union_preclean, sizeof (ScanJob));
- sj->ops = object_ops;
- sj->gc_thread_gray_queue = NULL;
- sgen_workers_start_all_workers (object_ops, &sj->job);
+ sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, workers_finish_callback);
} else {
- sgen_workers_start_all_workers (object_ops, NULL);
+ sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, NULL);
}
- gray_queue_enable_redirect (gc_thread_gray_queue);
}
if (mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT) {
- ScanJob *sj;
+ int i, split_count = sgen_workers_get_job_split_count ();
+
+ gray_queue_redirect (gc_thread_gray_queue);
/* Mod union card table */
- sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan mod union cardtable", job_scan_major_mod_union_card_table, sizeof (ScanJob));
- sj->ops = object_ops;
- sj->gc_thread_gray_queue = gc_thread_gray_queue;
- sgen_workers_enqueue_job (&sj->job, FALSE);
-
- sj = (ScanJob*)sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_card_table, sizeof (ScanJob));
- sj->ops = object_ops;
- sj->gc_thread_gray_queue = gc_thread_gray_queue;
- sgen_workers_enqueue_job (&sj->job, FALSE);
-
- TV_GETTIME (atv);
- time_major_scan_mod_union += TV_ELAPSED (btv, atv);
+ for (i = 0; i < split_count; i++) {
+ ParallelScanJob *psj;
+
+ psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan mod union cardtable", job_scan_major_mod_union_card_table, sizeof (ParallelScanJob));
+ psj->scan_job.ops = object_ops_par ? object_ops_par : object_ops_nopar;
+ psj->scan_job.gc_thread_gray_queue = NULL;
+ psj->job_index = i;
+ sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+
+ psj = (ParallelScanJob*)sgen_thread_pool_job_alloc ("scan LOS mod union cardtable", job_scan_los_mod_union_card_table, sizeof (ParallelScanJob));
+ psj->scan_job.ops = object_ops_par ? object_ops_par : object_ops_nopar;
+ psj->scan_job.gc_thread_gray_queue = NULL;
+ psj->job_index = i;
+ sgen_workers_enqueue_job (&psj->scan_job.job, TRUE);
+ }
+
+ /*
+ * If we enqueue a job while workers are running we need to sgen_workers_ensure_awake
+ * in order to make sure that we are running the idle func and draining all worker
+ * gray queues. The operation of starting workers implies this, so we start them after
+ * in order to avoid doing this operation twice. The workers will drain the main gray
+ * stack that contained roots and pinned objects and also scan the mod union card
+ * table.
+ */
+ sgen_workers_start_all_workers (object_ops_nopar, object_ops_par, NULL);
+ sgen_workers_join ();
}
sgen_pin_stats_report ();
static void
major_start_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason, gboolean concurrent, size_t *old_next_pin_slot)
{
- SgenObjectOperations *object_ops;
+ SgenObjectOperations *object_ops_nopar, *object_ops_par = NULL;
binary_protocol_collection_begin (gc_stats.major_gc_count, GENERATION_OLD);
g_assert (major_collector.is_concurrent);
concurrent_collection_in_progress = TRUE;
- object_ops = &major_collector.major_ops_concurrent_start;
+ object_ops_nopar = &major_collector.major_ops_concurrent_start;
+ if (major_collector.is_parallel)
+ object_ops_par = &major_collector.major_ops_conc_par_start;
+
} else {
- object_ops = &major_collector.major_ops_serial;
+ object_ops_nopar = &major_collector.major_ops_serial;
}
reset_pinned_from_failed_allocation ();
if (major_collector.start_major_collection)
major_collector.start_major_collection ();
- major_copy_or_mark_from_roots (gc_thread_gray_queue, old_next_pin_slot, concurrent ? COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT : COPY_OR_MARK_FROM_ROOTS_SERIAL, object_ops);
+ major_copy_or_mark_from_roots (gc_thread_gray_queue, old_next_pin_slot, concurrent ? COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT : COPY_OR_MARK_FROM_ROOTS_SERIAL, object_ops_nopar, object_ops_par);
}
static void
major_finish_collection (SgenGrayQueue *gc_thread_gray_queue, const char *reason, gboolean is_overflow, size_t old_next_pin_slot, gboolean forced)
{
ScannedObjectCounts counts;
- SgenObjectOperations *object_ops;
+ SgenObjectOperations *object_ops_nopar;
mword fragment_total;
TV_DECLARE (atv);
TV_DECLARE (btv);
TV_GETTIME (btv);
if (concurrent_collection_in_progress) {
- object_ops = &major_collector.major_ops_concurrent_finish;
+ SgenObjectOperations *object_ops_par = NULL;
- major_copy_or_mark_from_roots (gc_thread_gray_queue, NULL, COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT, object_ops);
+ object_ops_nopar = &major_collector.major_ops_concurrent_finish;
+ if (major_collector.is_parallel)
+ object_ops_par = &major_collector.major_ops_conc_par_finish;
+
+ major_copy_or_mark_from_roots (gc_thread_gray_queue, NULL, COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT, object_ops_nopar, object_ops_par);
#ifdef SGEN_DEBUG_INTERNAL_ALLOC
main_gc_thread = NULL;
#endif
} else {
- object_ops = &major_collector.major_ops_serial;
+ object_ops_nopar = &major_collector.major_ops_serial;
}
sgen_workers_assert_gray_queue_is_empty ();
- finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops, gc_thread_gray_queue));
+ finish_gray_stack (GENERATION_OLD, CONTEXT_FROM_OBJECT_OPERATIONS (object_ops_nopar, gc_thread_gray_queue));
TV_GETTIME (atv);
time_major_finish_gray_stack += TV_ELAPSED (btv, atv);
sgen_marksweep_init (&major_collector);
} else if (!strcmp (major_collector_opt, "marksweep-conc")) {
sgen_marksweep_conc_init (&major_collector);
+ } else if (!strcmp (major_collector_opt, "marksweep-conc-par")) {
+ sgen_marksweep_conc_par_init (&major_collector);
} else {
sgen_env_var_error (MONO_GC_PARAMS_NAME, "Using `" DEFAULT_MAJOR_NAME "` instead.", "Unknown major collector `%s'.", major_collector_opt);
goto use_default_major;
if (major_collector.post_param_init)
major_collector.post_param_init (&major_collector);
- if (major_collector.needs_thread_pool)
- sgen_workers_init (1);
+ if (major_collector.needs_thread_pool) {
+ int num_workers = 1;
+ if (major_collector.is_parallel) {
+ /* FIXME Detect the number of physical cores, instead of logical */
+ num_workers = mono_cpu_count () / 2;
+ if (num_workers < 1)
+ num_workers = 1;
+ }
+ sgen_workers_init (num_workers, (SgenWorkerCallback) major_collector.worker_init_cb);
+ }
sgen_memgov_init (max_heap, soft_limit, debug_print_allowance, allowance_ratio, save_target);
#define SGEN_POINTER_UNTAG_VTABLE(p) SGEN_POINTER_UNTAG_ALL((p))
/* returns NULL if not forwarded, or the forwarded address */
-#define SGEN_VTABLE_IS_FORWARDED(vtable) ((GCVTable *)(SGEN_POINTER_IS_TAGGED_FORWARDED ((vtable)) ? SGEN_POINTER_UNTAG_VTABLE ((vtable)) : NULL))
+#define SGEN_VTABLE_IS_FORWARDED(vtable) ((GCObject *)(SGEN_POINTER_IS_TAGGED_FORWARDED ((vtable)) ? SGEN_POINTER_UNTAG_VTABLE ((vtable)) : NULL))
#define SGEN_OBJECT_IS_FORWARDED(obj) ((GCObject *)SGEN_VTABLE_IS_FORWARDED (((mword*)(obj))[0]))
#define SGEN_VTABLE_IS_PINNED(vtable) SGEN_POINTER_IS_TAGGED_PINNED ((vtable))
#define SGEN_FORWARD_OBJECT(obj,fw_addr) do { \
*(void**)(obj) = SGEN_POINTER_TAG_FORWARDED ((fw_addr)); \
} while (0)
+#define SGEN_FORWARD_OBJECT_PAR(obj,fw_addr,final_fw_addr) do { \
+ gpointer old_vtable_word = *(gpointer*)obj; \
+ gpointer new_vtable_word; \
+ final_fw_addr = SGEN_VTABLE_IS_FORWARDED (old_vtable_word); \
+ if (final_fw_addr) \
+ break; \
+ new_vtable_word = SGEN_POINTER_TAG_FORWARDED ((fw_addr)); \
+ old_vtable_word = InterlockedCompareExchangePointer ((gpointer*)obj, new_vtable_word, old_vtable_word); \
+ final_fw_addr = SGEN_VTABLE_IS_FORWARDED (old_vtable_word); \
+ if (!final_fw_addr) \
+ final_fw_addr = (fw_addr); \
+ } while (0)
#define SGEN_PIN_OBJECT(obj) do { \
*(void**)(obj) = SGEN_POINTER_TAG_PINNED (*(void**)(obj)); \
} while (0)
struct _SgenMajorCollector {
size_t section_size;
gboolean is_concurrent;
+ gboolean is_parallel;
gboolean needs_thread_pool;
gboolean supports_cardtable;
gboolean sweeps_lazily;
SgenObjectOperations major_ops_serial;
SgenObjectOperations major_ops_concurrent_start;
SgenObjectOperations major_ops_concurrent_finish;
+ SgenObjectOperations major_ops_conc_par_start;
+ SgenObjectOperations major_ops_conc_par_finish;
GCObject* (*alloc_object) (GCVTable vtable, size_t size, gboolean has_references);
+ GCObject* (*alloc_object_par) (GCVTable vtable, size_t size, gboolean has_references);
void (*free_pinned_object) (GCObject *obj, size_t size);
/*
void (*free_non_pinned_object) (GCObject *obj, size_t size);
void (*pin_objects) (SgenGrayQueue *queue);
void (*pin_major_object) (GCObject *obj, SgenGrayQueue *queue);
- void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx);
+ void (*scan_card_table) (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count);
void (*iterate_live_block_ranges) (sgen_cardtable_block_callback callback);
void (*iterate_block_ranges) (sgen_cardtable_block_callback callback);
void (*update_cardtable_mod_union) (void);
guint8* (*get_cardtable_mod_union_for_reference) (char *object);
long long (*get_and_reset_num_major_objects_marked) (void);
void (*count_cards) (long long *num_total_cards, long long *num_marked_cards);
+
+ void (*worker_init_cb) (gpointer worker);
};
extern SgenMajorCollector major_collector;
void sgen_marksweep_init (SgenMajorCollector *collector);
-void sgen_marksweep_fixed_init (SgenMajorCollector *collector);
-void sgen_marksweep_par_init (SgenMajorCollector *collector);
-void sgen_marksweep_fixed_par_init (SgenMajorCollector *collector);
void sgen_marksweep_conc_init (SgenMajorCollector *collector);
+void sgen_marksweep_conc_par_init (SgenMajorCollector *collector);
SgenMajorCollector* sgen_get_major_collector (void);
static inline mword
sgen_safe_object_get_size (GCObject *obj)
{
- GCObject *forwarded;
-
- if ((forwarded = SGEN_OBJECT_IS_FORWARDED (obj)))
- obj = forwarded;
+ GCObject *forwarded;
+ GCVTable vtable = SGEN_LOAD_VTABLE_UNCHECKED (obj);
- return sgen_client_par_object_get_size (SGEN_LOAD_VTABLE (obj), obj);
+ /*
+ * Once we load the vtable, we must always use it, in case we are in parallel case.
+ * Otherwise the object might get forwarded in the meantime and we would read an
+ * invalid vtable. An object cannot be forwarded for a second time during same GC.
+ */
+ if ((forwarded = SGEN_VTABLE_IS_FORWARDED (vtable)))
+ return sgen_client_par_object_get_size (SGEN_LOAD_VTABLE (forwarded), obj);
+ else
+ return sgen_client_par_object_get_size ((GCVTable)SGEN_POINTER_UNTAG_ALL (vtable), obj);
}
static inline gboolean
gboolean sgen_ptr_is_in_los (char *ptr, char **start);
void sgen_los_iterate_objects (IterateObjectCallbackFunc cb, void *user_data);
void sgen_los_iterate_live_block_ranges (sgen_cardtable_block_callback callback);
-void sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx);
+void sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count);
void sgen_los_update_cardtable_mod_union (void);
void sgen_los_count_cards (long long *num_total_cards, long long *num_marked_cards);
gboolean sgen_los_is_valid_object (char *object);
LOSObject* sgen_los_header_for_object (GCObject *data);
mword sgen_los_object_size (LOSObject *obj);
void sgen_los_pin_object (GCObject *obj);
+gboolean sgen_los_pin_object_par (GCObject *obj);
gboolean sgen_los_object_is_pinned (GCObject *obj);
void sgen_los_mark_mod_union_card (GCObject *mono_obj, void **ptr);
static GrayQueueSection *last_gray_queue_free_list;
void
-sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue)
+sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue, gboolean is_parallel)
{
GrayQueueSection *section;
- if (queue->alloc_prepare_func)
- queue->alloc_prepare_func (queue);
-
if (queue->free_list) {
/* Use the previously allocated queue sections if possible */
section = queue->free_list;
STATE_SET (section, GRAY_QUEUE_SECTION_STATE_FLOATING);
}
- section->size = SGEN_GRAY_QUEUE_SECTION_SIZE;
+ /* Section is empty */
+ section->size = 0;
STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED);
/* Link it with the others */
section->next = queue->first;
+ section->prev = NULL;
+ if (queue->first)
+ queue->first->prev = section;
+ else
+ queue->last = section;
queue->first = section;
queue->cursor = section->entries - 1;
+
+ if (is_parallel) {
+ mono_memory_write_barrier ();
+ /*
+ * FIXME
+ * we could probably optimize the code to only rely on the write barrier
+ * for synchronization with the stealer thread. Additionally we could also
+ * do a write barrier once every other gray queue change, and request
+ * to have a minimum of sections before stealing, to keep consistency.
+ */
+ InterlockedIncrement (&queue->num_sections);
+ } else {
+ queue->num_sections++;
+ }
}
void
*/
void
-sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc)
+sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc, gboolean is_parallel)
{
GrayQueueEntry entry = SGEN_GRAY_QUEUE_ENTRY (obj, desc);
if (G_UNLIKELY (!queue->first || queue->cursor == GRAY_LAST_CURSOR_POSITION (queue->first))) {
if (queue->first) {
- /* Set the current section size back to default, might have been changed by sgen_gray_object_dequeue_section */
+ /*
+ * We don't actively update the section size with each push/pop. For the first
+ * section we determine the size from the cursor position. For the reset of the
+ * sections we need to have the size set.
+ */
queue->first->size = SGEN_GRAY_QUEUE_SECTION_SIZE;
}
- sgen_gray_object_alloc_queue_section (queue);
+ sgen_gray_object_alloc_queue_section (queue, is_parallel);
}
STATE_ASSERT (queue->first, GRAY_QUEUE_SECTION_STATE_ENQUEUED);
SGEN_ASSERT (9, queue->cursor <= GRAY_LAST_CURSOR_POSITION (queue->first), "gray queue %p overflow, first %p, cursor %p", queue, queue->first, queue->cursor);
#endif
}
+/*
+ * We attempt to spread the objects in the gray queue across a number
+ * of sections. If the queue has more sections, then it's already spread,
+ * if it doesn't have enough sections, then we allocate as many as we
+ * can.
+ */
+void
+sgen_gray_object_spread (SgenGrayQueue *queue, int num_sections)
+{
+ GrayQueueSection *section_start, *section_end;
+ int total_entries = 0, num_entries_per_section;
+ int num_sections_final;
+
+ if (queue->num_sections >= num_sections)
+ return;
+
+ if (!queue->first)
+ return;
+
+ /* Compute number of elements in the gray queue */
+ queue->first->size = queue->cursor - queue->first->entries + 1;
+ total_entries = queue->first->size;
+ for (section_start = queue->first->next; section_start != NULL; section_start = section_start->next) {
+ SGEN_ASSERT (0, section_start->size == SGEN_GRAY_QUEUE_SECTION_SIZE, "We expect all section aside from the first one to be full");
+ total_entries += section_start->size;
+ }
+
+ /* Compute how many sections we should have and elements per section */
+ num_sections_final = (total_entries > num_sections) ? num_sections : total_entries;
+ num_entries_per_section = total_entries / num_sections_final;
+
+ /* Allocate all needed sections */
+ while (queue->num_sections < num_sections_final)
+ sgen_gray_object_alloc_queue_section (queue, TRUE);
+
+ /* Spread out the elements in the sections. By design, sections at the end are fuller. */
+ section_start = queue->first;
+ section_end = queue->last;
+ while (section_start != section_end) {
+ /* We move entries from end to start, until they meet */
+ while (section_start->size < num_entries_per_section) {
+ GrayQueueEntry entry;
+ if (section_end->size <= num_entries_per_section) {
+ section_end = section_end->prev;
+ if (section_end == section_start)
+ break;
+ }
+ if (section_end->size <= num_entries_per_section)
+ break;
+
+ section_end->size--;
+ entry = section_end->entries [section_end->size];
+ section_start->entries [section_start->size] = entry;
+ section_start->size++;
+ }
+ section_start = section_start->next;
+ }
+
+ queue->cursor = queue->first->entries + queue->first->size - 1;
+ queue->num_sections = num_sections_final;
+}
+
GrayQueueEntry
-sgen_gray_object_dequeue (SgenGrayQueue *queue)
+sgen_gray_object_dequeue (SgenGrayQueue *queue, gboolean is_parallel)
{
GrayQueueEntry entry;
#endif
if (G_UNLIKELY (queue->cursor < GRAY_FIRST_CURSOR_POSITION (queue->first))) {
- GrayQueueSection *section = queue->first;
+ GrayQueueSection *section;
+ gint32 old_num_sections = 0;
+
+ if (is_parallel)
+ old_num_sections = InterlockedDecrement (&queue->num_sections);
+ else
+ queue->num_sections--;
+
+ if (is_parallel && old_num_sections <= 0) {
+ mono_os_mutex_lock (&queue->steal_mutex);
+ }
+
+ section = queue->first;
queue->first = section->next;
+ if (queue->first) {
+ queue->first->prev = NULL;
+ } else {
+ queue->last = NULL;
+ SGEN_ASSERT (0, !old_num_sections, "Why do we have an inconsistent number of sections ?");
+ }
section->next = queue->free_list;
STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FREE_LIST);
queue->free_list = section;
queue->cursor = queue->first ? queue->first->entries + queue->first->size - 1 : NULL;
+
+ if (is_parallel && old_num_sections <= 0) {
+ mono_os_mutex_unlock (&queue->steal_mutex);
+ }
}
return entry;
if (!queue->first)
return NULL;
+ /* We never steal from this queue */
+ queue->num_sections--;
+
section = queue->first;
queue->first = section->next;
+ if (queue->first)
+ queue->first->prev = NULL;
+ else
+ queue->last = NULL;
section->next = NULL;
section->size = queue->cursor - section->entries + 1;
return section;
}
+GrayQueueSection*
+sgen_gray_object_steal_section (SgenGrayQueue *queue)
+{
+ gint32 sections_remaining;
+ GrayQueueSection *section = NULL;
+
+ /*
+ * With each push/pop into the queue we increment the number of sections.
+ * There is only one thread accessing the top (the owner) and potentially
+ * multiple workers trying to steal sections from the bottom, so we need
+ * to lock. A num sections decrement from the owner means that the first
+ * section is reserved, while a decrement by the stealer means that the
+ * last section is reserved. If after we decrement the num sections, we
+ * have at least one more section present, it means we can't race with
+ * the other thread. If this is not the case the steal end abandons the
+ * pop, setting back the num_sections, while the owner end will take a
+ * lock to make sure we are not racing with the stealer (since the stealer
+ * might have popped an entry and be in the process of updating the entry
+ * that the owner is trying to pop.
+ */
+
+ if (queue->num_sections <= 1)
+ return NULL;
+
+ /* Give up if there is contention on the last section */
+ if (mono_os_mutex_trylock (&queue->steal_mutex) != 0)
+ return NULL;
+
+ sections_remaining = InterlockedDecrement (&queue->num_sections);
+ if (sections_remaining <= 0) {
+ /* The section that we tried to steal might be the head of the queue. */
+ InterlockedIncrement (&queue->num_sections);
+ } else {
+ /* We have reserved for us the tail section of the queue */
+ section = queue->last;
+ SGEN_ASSERT (0, section, "Why we don't have any sections to steal?");
+ SGEN_ASSERT (0, !section->next, "Why aren't we stealing the tail?");
+ queue->last = section->prev;
+ section->prev = NULL;
+ SGEN_ASSERT (0, queue->last, "Why are we stealing the last section?");
+ queue->last->next = NULL;
+
+ STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_ENQUEUED, GRAY_QUEUE_SECTION_STATE_FLOATING);
+ }
+
+ mono_os_mutex_unlock (&queue->steal_mutex);
+ return section;
+}
+
void
-sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section)
+sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section, gboolean is_parallel)
{
STATE_TRANSITION (section, GRAY_QUEUE_SECTION_STATE_FLOATING, GRAY_QUEUE_SECTION_STATE_ENQUEUED);
queue->first->size = queue->cursor - queue->first->entries + 1;
section->next = queue->first;
+ section->prev = NULL;
+ if (queue->first)
+ queue->first->prev = section;
+ else
+ queue->last = section;
queue->first = section;
queue->cursor = queue->first->entries + queue->first->size - 1;
#ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE
queue->enqueue_check_func (section->entries [i].obj);
}
#endif
+ if (is_parallel) {
+ mono_memory_write_barrier ();
+ InterlockedIncrement (&queue->num_sections);
+ } else {
+ queue->num_sections++;
+ }
}
void
queue->enqueue_check_func = enqueue_check_func;
#endif
+ mono_os_mutex_init (&queue->steal_mutex);
+
if (reuse_free_list) {
queue->free_list = last_gray_queue_free_list;
last_gray_queue_free_list = NULL;
memset (queue, 0, sizeof (SgenGrayQueue));
}
-void
-sgen_gray_queue_set_alloc_prepare (SgenGrayQueue *queue, GrayQueueAllocPrepareFunc alloc_prepare_func)
-{
- SGEN_ASSERT (0, !queue->alloc_prepare_func, "Can't set gray queue alloc-prepare twice");
- queue->alloc_prepare_func = alloc_prepare_func;
-}
-
void
sgen_gray_object_queue_deinit (SgenGrayQueue *queue)
{
/* SGEN_GRAY_QUEUE_HEADER_SIZE is number of machine words */
#ifdef SGEN_CHECK_GRAY_OBJECT_SECTIONS
-#define SGEN_GRAY_QUEUE_HEADER_SIZE 4
+#define SGEN_GRAY_QUEUE_HEADER_SIZE 5
#else
-#define SGEN_GRAY_QUEUE_HEADER_SIZE 2
+#define SGEN_GRAY_QUEUE_HEADER_SIZE 3
#endif
#define SGEN_GRAY_QUEUE_SECTION_SIZE (128 - SGEN_GRAY_QUEUE_HEADER_SIZE)
#define SGEN_GRAY_QUEUE_ENTRY(obj,desc) { (obj), (desc) }
+#define GRAY_OBJECT_ENQUEUE_SERIAL(queue, obj, desc) (GRAY_OBJECT_ENQUEUE (queue, obj, desc, FALSE))
+#define GRAY_OBJECT_ENQUEUE_PARALLEL(queue, obj, desc) (GRAY_OBJECT_ENQUEUE (queue, obj, desc, TRUE))
+#define GRAY_OBJECT_DEQUEUE_SERIAL(queue, obj, desc) (GRAY_OBJECT_DEQUEUE (queue, obj, desc, FALSE))
+#define GRAY_OBJECT_DEQUEUE_PARALLEL(queue, obj, desc) (GRAY_OBJECT_DEQUEUE (queue, obj, desc, TRUE))
+
/*
* This is a stack now instead of a queue, so the most recently added items are removed
* first, improving cache locality, and keeping the stack size manageable.
GrayQueueSectionState state;
#endif
int size;
- GrayQueueSection *next;
+ GrayQueueSection *next, *prev;
GrayQueueEntry entries [SGEN_GRAY_QUEUE_SECTION_SIZE];
};
struct _SgenGrayQueue {
GrayQueueEntry *cursor;
- GrayQueueSection *first;
+ GrayQueueSection *first, *last;
GrayQueueSection *free_list;
- GrayQueueAllocPrepareFunc alloc_prepare_func;
+ mono_mutex_t steal_mutex;
+ gint32 num_sections;
#ifdef SGEN_CHECK_GRAY_OBJECT_ENQUEUE
GrayQueueEnqueueCheckFunc enqueue_check_func;
#endif
void sgen_init_gray_queues (void);
-void sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc);
-GrayQueueEntry sgen_gray_object_dequeue (SgenGrayQueue *queue);
+void sgen_gray_object_enqueue (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc, gboolean is_parallel);
+GrayQueueEntry sgen_gray_object_dequeue (SgenGrayQueue *queue, gboolean is_parallel);
GrayQueueSection* sgen_gray_object_dequeue_section (SgenGrayQueue *queue);
-void sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section);
+GrayQueueSection* sgen_gray_object_steal_section (SgenGrayQueue *queue);
+void sgen_gray_object_spread (SgenGrayQueue *queue, int num_sections);
+void sgen_gray_object_enqueue_section (SgenGrayQueue *queue, GrayQueueSection *section, gboolean is_parallel);
void sgen_gray_object_queue_trim_free_list (SgenGrayQueue *queue);
void sgen_gray_object_queue_init (SgenGrayQueue *queue, GrayQueueEnqueueCheckFunc enqueue_check_func, gboolean reuse_free_list);
void sgen_gray_object_queue_dispose (SgenGrayQueue *queue);
-void sgen_gray_queue_set_alloc_prepare (SgenGrayQueue *queue, GrayQueueAllocPrepareFunc alloc_prepare_func);
void sgen_gray_object_queue_deinit (SgenGrayQueue *queue);
-void sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue);
+void sgen_gray_object_alloc_queue_section (SgenGrayQueue *queue, gboolean is_parallel);
void sgen_gray_object_free_queue_section (GrayQueueSection *section);
void sgen_section_gray_queue_init (SgenSectionGrayQueue *queue, gboolean locked,
}
static inline MONO_ALWAYS_INLINE void
-GRAY_OBJECT_ENQUEUE (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc)
+GRAY_OBJECT_ENQUEUE (SgenGrayQueue *queue, GCObject *obj, SgenDescriptor desc, gboolean is_parallel)
{
#if SGEN_MAX_DEBUG_LEVEL >= 9
- sgen_gray_object_enqueue (queue, obj, desc);
+ sgen_gray_object_enqueue (queue, obj, desc, is_parallel);
#else
if (G_UNLIKELY (!queue->first || queue->cursor == GRAY_LAST_CURSOR_POSITION (queue->first))) {
- sgen_gray_object_enqueue (queue, obj, desc);
+ sgen_gray_object_enqueue (queue, obj, desc, is_parallel);
} else {
GrayQueueEntry entry = SGEN_GRAY_QUEUE_ENTRY (obj, desc);
}
static inline MONO_ALWAYS_INLINE void
-GRAY_OBJECT_DEQUEUE (SgenGrayQueue *queue, GCObject** obj, SgenDescriptor *desc)
+GRAY_OBJECT_DEQUEUE (SgenGrayQueue *queue, GCObject** obj, SgenDescriptor *desc, gboolean is_parallel)
{
GrayQueueEntry entry;
#if SGEN_MAX_DEBUG_LEVEL >= 9
- entry = sgen_gray_object_dequeue (queue);
+ entry = sgen_gray_object_dequeue (queue, is_parallel);
*obj = entry.obj;
*desc = entry.desc;
#else
*obj = NULL;
} else if (G_UNLIKELY (queue->cursor == GRAY_FIRST_CURSOR_POSITION (queue->first))) {
- entry = sgen_gray_object_dequeue (queue);
+ entry = sgen_gray_object_dequeue (queue, is_parallel);
*obj = entry.obj;
*desc = entry.desc;
} else {
}
void
-sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
+sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
{
LOSObject *obj;
+ int i = 0;
binary_protocol_los_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
- for (obj = los_object_list; obj; obj = obj->next) {
+ for (obj = los_object_list; obj; obj = obj->next, i++) {
mword num_cards = 0;
guint8 *cards;
+ if (i % job_split_count != job_index)
+ continue;
+
if (!SGEN_OBJECT_HAS_REFERENCES (obj->data))
continue;
if (!sgen_los_object_is_pinned (obj->data))
continue;
+ if (!obj->cardtable_mod_union)
+ continue;
+
cards = get_cardtable_mod_union_for_object (obj);
g_assert (cards);
if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN) {
binary_protocol_pin (data, (gpointer)SGEN_LOAD_VTABLE (data), sgen_safe_object_get_size (data));
}
+gboolean
+sgen_los_pin_object_par (GCObject *data)
+{
+ LOSObject *obj = sgen_los_header_for_object (data);
+ mword old_size = obj->size;
+ if (old_size & 1)
+ return FALSE;
+#if SIZEOF_VOID_P == 4
+ old_size = InterlockedCompareExchange ((volatile gint32*)&obj->size, old_size | 1, old_size);
+#else
+ old_size = InterlockedCompareExchange64 ((volatile gint64*)&obj->size, old_size | 1, old_size);
+#endif
+ if (old_size & 1)
+ return FALSE;
+ binary_protocol_pin (data, (gpointer)SGEN_LOAD_VTABLE (data), sgen_safe_object_get_size (data));
+ return TRUE;
+}
+
static void
sgen_los_unpin_object (GCObject *data)
{
if (sgen_ptr_in_nursery (obj)) {
#if !defined(COPY_OR_MARK_CONCURRENT) && !defined(COPY_OR_MARK_CONCURRENT_WITH_EVACUATION)
int word, bit;
+ gboolean first = TRUE;
GCObject *forwarded, *old_obj;
mword vtable_word = *(mword*)obj;
do_copy_object:
#endif
old_obj = obj;
+#ifdef COPY_OR_MARK_PARALLEL
+ obj = copy_object_no_checks_par (obj, queue);
+#else
obj = copy_object_no_checks (obj, queue);
+#endif
if (G_UNLIKELY (old_obj == obj)) {
/*
* If we fail to evacuate an object we just stop doing it for a
block = MS_BLOCK_FOR_OBJ (obj);
MS_CALC_MARK_BIT (word, bit, obj);
SGEN_ASSERT (9, !MS_MARK_BIT (block, word, bit), "object %p already marked", obj);
+#ifdef COPY_OR_MARK_PARALLEL
+ MS_SET_MARK_BIT_PAR (block, word, bit, first);
+#else
MS_SET_MARK_BIT (block, word, bit);
- binary_protocol_mark (obj, (gpointer)SGEN_LOAD_VTABLE (obj), sgen_safe_object_get_size (obj));
+#endif
+ if (first)
+ binary_protocol_mark (obj, (gpointer)SGEN_LOAD_VTABLE (obj), sgen_safe_object_get_size (obj));
return FALSE;
#endif
}
#endif
+#ifdef COPY_OR_MARK_PARALLEL
+ MS_MARK_OBJECT_AND_ENQUEUE_PAR (obj, desc, block, queue);
+#else
MS_MARK_OBJECT_AND_ENQUEUE (obj, desc, block, queue);
+#endif
} else {
+ gboolean first = TRUE;
HEAVY_STAT (++stat_optimized_copy_major_large);
-
+#ifdef COPY_OR_MARK_PARALLEL
+ first = sgen_los_pin_object_par (obj);
+#else
if (sgen_los_object_is_pinned (obj))
- return FALSE;
- binary_protocol_pin (obj, (gpointer)SGEN_LOAD_VTABLE (obj), sgen_safe_object_get_size (obj));
+ first = FALSE;
+ else
+ sgen_los_pin_object (obj);
+#endif
- sgen_los_pin_object (obj);
- if (SGEN_OBJECT_HAS_REFERENCES (obj))
- GRAY_OBJECT_ENQUEUE (queue, obj, desc);
+ if (first) {
+ binary_protocol_pin (obj, (gpointer)SGEN_LOAD_VTABLE (obj), sgen_safe_object_get_size (obj));
+ if (SGEN_OBJECT_HAS_REFERENCES (obj))
+#ifdef COPY_OR_MARK_PARALLEL
+ GRAY_OBJECT_ENQUEUE_PARALLEL (queue, obj, desc);
+#else
+ GRAY_OBJECT_ENQUEUE_SERIAL (queue, obj, desc);
+#endif
+ }
}
return FALSE;
}
static gboolean
DRAIN_GRAY_STACK_FUNCTION_NAME (SgenGrayQueue *queue)
{
-#if defined(COPY_OR_MARK_CONCURRENT) || defined(COPY_OR_MARK_CONCURRENT_WITH_EVACUATION)
+#if defined(COPY_OR_MARK_CONCURRENT) || defined(COPY_OR_MARK_CONCURRENT_WITH_EVACUATION) || defined(COPY_OR_MARK_PARALLEL)
int i;
for (i = 0; i < 32; i++) {
#else
HEAVY_STAT (++stat_drain_loops);
- GRAY_OBJECT_DEQUEUE (queue, &obj, &desc);
+#if defined(COPY_OR_MARK_PARALLEL)
+ GRAY_OBJECT_DEQUEUE_PARALLEL (queue, &obj, &desc);
+#else
+ GRAY_OBJECT_DEQUEUE_SERIAL (queue, &obj, &desc);
+#endif
if (!obj)
return TRUE;
return FALSE;
}
+#undef COPY_OR_MARK_PARALLEL
#undef COPY_OR_MARK_FUNCTION_NAME
#undef COPY_OR_MARK_WITH_EVACUATION
#undef COPY_OR_MARK_CONCURRENT
#define MS_BLOCK_FREE (MS_BLOCK_SIZE - MS_BLOCK_SKIP)
-#define MS_NUM_MARK_WORDS ((MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (mword) * 8 - 1) / (sizeof (mword) * 8))
+#define MS_NUM_MARK_WORDS (MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8)
/*
* Blocks progress from one state to the next:
void ** volatile free_list;
MSBlockInfo * volatile next_free;
guint8 * volatile cardtable_mod_union;
- mword mark_words [MS_NUM_MARK_WORDS];
+ guint32 mark_words [MS_NUM_MARK_WORDS];
};
#define MS_BLOCK_FOR_BLOCK_INFO(b) ((char*)(b))
//casting to int is fine since blocks are 32k
#define MS_CALC_MARK_BIT(w,b,o) do { \
int i = ((int)((char*)(o) - MS_BLOCK_DATA_FOR_OBJ ((o)))) >> SGEN_ALLOC_ALIGN_BITS; \
- if (sizeof (mword) == 4) { \
- (w) = i >> 5; \
- (b) = i & 31; \
- } else { \
- (w) = i >> 6; \
- (b) = i & 63; \
- } \
+ (w) = i >> 5; \
+ (b) = i & 31; \
} while (0)
#define MS_MARK_BIT(bl,w,b) ((bl)->mark_words [(w)] & (ONE_P << (b)))
#define MS_SET_MARK_BIT(bl,w,b) ((bl)->mark_words [(w)] |= (ONE_P << (b)))
+#define MS_SET_MARK_BIT_PAR(bl,w,b,first) do { \
+ guint32 tmp_mark_word = (bl)->mark_words [(w)]; \
+ guint32 old_mark_word; \
+ first = FALSE; \
+ while (!(tmp_mark_word & (ONE_P << (b)))) { \
+ old_mark_word = tmp_mark_word; \
+ tmp_mark_word = InterlockedCompareExchange ((volatile gint32*)&(bl)->mark_words [w], old_mark_word | (ONE_P << (b)), old_mark_word); \
+ if (tmp_mark_word == old_mark_word) { \
+ first = TRUE; \
+ break; \
+ } \
+ } \
+ } while (0)
+
#define MS_OBJ_ALLOCED(o,b) (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE))
#define BLOCK_TAG(bl) ((bl)->has_references ? BLOCK_TAG_HAS_REFERENCES ((bl)) : (bl))
/* all allocated blocks in the system */
-static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, NULL, NULL, INTERNAL_MEM_PIN_QUEUE);
+static SgenArrayList allocated_blocks = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_list_default_is_slot_set, sgen_array_list_default_cas_setter, INTERNAL_MEM_PIN_QUEUE);
/* non-allocated block free-list */
static void *empty_blocks = NULL;
* thread only ever adds blocks to the free list, so the ABA problem can't occur.
*/
static MSBlockInfo * volatile *free_block_lists [MS_BLOCK_TYPE_MAX];
+static MonoNativeTlsKey worker_block_free_list_key;
static guint64 stat_major_blocks_alloced = 0;
static guint64 stat_major_blocks_freed = 0;
#define FREE_BLOCKS_FROM(lists,p,r) (lists [((p) ? MS_BLOCK_FLAG_PINNED : 0) | ((r) ? MS_BLOCK_FLAG_REFS : 0)])
#define FREE_BLOCKS(p,r) (FREE_BLOCKS_FROM (free_block_lists, (p), (r)))
+#define FREE_BLOCKS_LOCAL(p,r) (FREE_BLOCKS_FROM (((MSBlockInfo***)mono_native_tls_get_value (worker_block_free_list_key)), (p), (r)))
#define MS_BLOCK_OBJ_SIZE_INDEX(s) \
(((s)+7)>>3 < MS_NUM_FAST_BLOCK_OBJ_SIZE_INDEXES ? \
return alloc_obj (vtable, size, FALSE, has_references);
}
+/*
+ * This can only be called by sgen workers. While this is called we assume
+ * that no other thread is accessing the block free lists. The world should
+ * be stopped and the gc thread should be waiting for workers to finish.
+ */
+static GCObject*
+major_alloc_object_par (GCVTable vtable, size_t size, gboolean has_references)
+{
+ int size_index = MS_BLOCK_OBJ_SIZE_INDEX (size);
+ MSBlockInfo * volatile * free_blocks = FREE_BLOCKS (FALSE, has_references);
+ MSBlockInfo **free_blocks_local = FREE_BLOCKS_LOCAL (FALSE, has_references);
+ void *obj;
+
+ if (free_blocks_local [size_index]) {
+get_slot:
+ obj = unlink_slot_from_free_list_uncontested (free_blocks_local, size_index);
+ } else {
+ MSBlockInfo *block;
+get_block:
+ block = free_blocks [size_index];
+ if (!block) {
+ if (G_UNLIKELY (!ms_alloc_block (size_index, FALSE, has_references)))
+ return NULL;
+ goto get_block;
+ } else {
+ MSBlockInfo *next_free = block->next_free;
+ /*
+ * Once a block is removed from the main list, it cannot return on the list until
+ * all the workers are finished and sweep is starting. This means we don't need
+ * to account for ABA problems.
+ */
+ if (SGEN_CAS_PTR ((volatile gpointer *)&free_blocks [size_index], next_free, block) != block)
+ goto get_block;
+ g_assert (block->free_list);
+ block->next_free = free_blocks_local [size_index];
+ free_blocks_local [size_index] = block;
+
+ goto get_slot;
+ }
+ }
+
+ /* FIXME: assumes object layout */
+ *(GCVTable*)obj = vtable;
+
+ /* FIXME is it worth CAS-ing here */
+ total_allocated_major += block_obj_sizes [size_index];
+
+ return (GCObject *)obj;
+}
+
/*
* We're not freeing the block if it's empty. We leave that work for
* the next major collection.
if (!MS_MARK_BIT ((block), __word, __bit)) { \
MS_SET_MARK_BIT ((block), __word, __bit); \
if (sgen_gc_descr_has_references (desc)) \
- GRAY_OBJECT_ENQUEUE ((queue), (obj), (desc)); \
+ GRAY_OBJECT_ENQUEUE_SERIAL ((queue), (obj), (desc)); \
+ binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
+ INC_NUM_MAJOR_OBJECTS_MARKED (); \
+ } \
+ } while (0)
+#define MS_MARK_OBJECT_AND_ENQUEUE_PAR(obj,desc,block,queue) do { \
+ int __word, __bit; \
+ gboolean first; \
+ MS_CALC_MARK_BIT (__word, __bit, (obj)); \
+ SGEN_ASSERT (9, MS_OBJ_ALLOCED ((obj), (block)), "object %p not allocated", obj); \
+ MS_SET_MARK_BIT_PAR ((block), __word, __bit, first); \
+ if (first) { \
+ if (sgen_gc_descr_has_references (desc)) \
+ GRAY_OBJECT_ENQUEUE_PARALLEL ((queue), (obj), (desc)); \
binary_protocol_mark ((obj), (gpointer)SGEN_LOAD_VTABLE ((obj)), sgen_safe_object_get_size ((obj))); \
INC_NUM_MAJOR_OBJECTS_MARKED (); \
} \
} while (0)
+
+
static void
pin_major_object (GCObject *obj, SgenGrayQueue *queue)
{
MS_MARK_OBJECT_AND_ENQUEUE (obj, sgen_obj_get_descriptor (obj), block, queue);
}
+#define COPY_OR_MARK_PARALLEL
#include "sgen-major-copy-object.h"
static long long
#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_no_evacuation
#include "sgen-marksweep-drain-gray-stack.h"
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_par_no_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_par_no_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_no_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
#define COPY_OR_MARK_WITH_EVACUATION
#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_with_evacuation
#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_with_evacuation
#define SCAN_PTR_FIELD_FUNCTION_NAME major_scan_ptr_field_with_evacuation
#include "sgen-marksweep-drain-gray-stack.h"
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_WITH_EVACUATION
+#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_par_with_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_par_with_evacuation
+#define SCAN_VTYPE_FUNCTION_NAME major_scan_vtype_par_with_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_par_with_evacuation
+#define SCAN_PTR_FIELD_FUNCTION_NAME major_scan_ptr_field_par_with_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
#define COPY_OR_MARK_CONCURRENT
#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_concurrent_no_evacuation
#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_concurrent_no_evacuation
#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_concurrent_no_evacuation
#include "sgen-marksweep-drain-gray-stack.h"
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_CONCURRENT
+#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_concurrent_par_no_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_concurrent_par_no_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_concurrent_par_no_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
#define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_concurrent_with_evacuation
#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_concurrent_with_evacuation
#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_concurrent_with_evacuation
#include "sgen-marksweep-drain-gray-stack.h"
+#define COPY_OR_MARK_PARALLEL
+#define COPY_OR_MARK_CONCURRENT_WITH_EVACUATION
+#define COPY_OR_MARK_FUNCTION_NAME major_copy_or_mark_object_concurrent_par_with_evacuation
+#define SCAN_OBJECT_FUNCTION_NAME major_scan_object_concurrent_par_with_evacuation
+#define SCAN_VTYPE_FUNCTION_NAME major_scan_vtype_concurrent_par_with_evacuation
+#define SCAN_PTR_FIELD_FUNCTION_NAME major_scan_ptr_field_concurrent_par_with_evacuation
+#define DRAIN_GRAY_STACK_FUNCTION_NAME drain_gray_stack_concurrent_par_with_evacuation
+#include "sgen-marksweep-drain-gray-stack.h"
+
static inline gboolean
major_is_evacuating (void)
{
return drain_gray_stack_no_evacuation (queue);
}
+static gboolean
+drain_gray_stack_par (SgenGrayQueue *queue)
+{
+ if (major_is_evacuating ())
+ return drain_gray_stack_par_with_evacuation (queue);
+ else
+ return drain_gray_stack_par_no_evacuation (queue);
+}
+
static gboolean
drain_gray_stack_concurrent (SgenGrayQueue *queue)
{
return drain_gray_stack_concurrent_no_evacuation (queue);
}
+static gboolean
+drain_gray_stack_concurrent_par (SgenGrayQueue *queue)
+{
+ if (major_is_evacuating ())
+ return drain_gray_stack_concurrent_par_with_evacuation (queue);
+ else
+ return drain_gray_stack_concurrent_par_no_evacuation (queue);
+}
+
static void
major_copy_or_mark_object_canonical (GCObject **ptr, SgenGrayQueue *queue)
{
major_copy_or_mark_object_concurrent_with_evacuation (ptr, *ptr, queue);
}
+static void
+major_copy_or_mark_object_concurrent_par_canonical (GCObject **ptr, SgenGrayQueue *queue)
+{
+ major_copy_or_mark_object_concurrent_par_with_evacuation (ptr, *ptr, queue);
+}
+
static void
major_copy_or_mark_object_concurrent_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
{
major_copy_or_mark_object_with_evacuation (ptr, *ptr, queue);
}
+static void
+major_copy_or_mark_object_concurrent_par_finish_canonical (GCObject **ptr, SgenGrayQueue *queue)
+{
+ major_copy_or_mark_object_par_with_evacuation (ptr, *ptr, queue);
+}
+
static void
mark_pinned_objects_in_block (MSBlockInfo *block, size_t first_entry, size_t last_entry, SgenGrayQueue *queue)
{
}
/* reset mark bits */
- memset (block->mark_words, 0, sizeof (mword) * MS_NUM_MARK_WORDS);
+ memset (block->mark_words, 0, sizeof (guint32) * MS_NUM_MARK_WORDS);
/* Reverse free list so that it's in address order */
reversed = NULL;
static volatile size_t num_major_sections_before_sweep;
static volatile size_t num_major_sections_freed_in_sweep;
+static void
+sgen_worker_clear_free_block_lists (WorkerData *worker)
+{
+ int i, j;
+
+ if (!worker->free_block_lists)
+ return;
+
+ for (i = 0; i < MS_BLOCK_TYPE_MAX; i++) {
+ for (j = 0; j < num_block_obj_sizes; j++) {
+ ((MSBlockInfo***) worker->free_block_lists) [i][j] = NULL;
+ }
+ }
+}
+
static void
sweep_start (void)
{
free_blocks [j] = NULL;
}
- sgen_array_list_remove_nulls (&allocated_blocks);
+ sgen_workers_foreach (sgen_worker_clear_free_block_lists);
}
static void sweep_finish (void);
ms_free_block (block);
SGEN_ATOMIC_ADD_P (num_major_sections, -1);
+ SGEN_ATOMIC_ADD_P (num_major_sections_freed_in_sweep, 1);
tagged_block = NULL;
}
* cooperate with the sweep thread to finish sweeping, and they will traverse from
* low to high, to avoid constantly colliding on the same blocks.
*/
- for (block_index = num_blocks; block_index-- > 0;) {
- /*
- * The block might have been freed by another thread doing some checking
- * work.
- */
- if (!ensure_block_is_checked_for_sweeping (block_index, TRUE, NULL))
- ++num_major_sections_freed_in_sweep;
+ for (block_index = allocated_blocks.next_slot; block_index-- > 0;) {
+ ensure_block_is_checked_for_sweeping (block_index, TRUE, NULL);
}
while (!try_set_sweep_state (SWEEP_STATE_COMPACTING, SWEEP_STATE_SWEEPING)) {
sweep_start ();
- SGEN_ASSERT (0, num_major_sections == allocated_blocks.next_slot, "We don't know how many blocks we have?");
-
num_major_sections_before_sweep = num_major_sections;
num_major_sections_freed_in_sweep = 0;
}
static void
-major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx)
+major_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
{
MSBlockInfo *block;
gboolean has_references, was_sweeping, skip_scan;
binary_protocol_major_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) {
+ if (__index % job_split_count != job_index)
+ continue;
#ifdef PREFETCH_CARDS
- int prefetch_index = __index + 6;
+ int prefetch_index = __index + 6 * job_split_count;
if (prefetch_index < allocated_blocks.next_slot) {
MSBlockInfo *prefetch_block = BLOCK_UNTAG (*sgen_array_list_get_slot (&allocated_blocks, prefetch_index));
PREFETCH_READ (prefetch_block);
collector->needs_thread_pool = concurrent_mark || concurrent_sweep;
}
+/* We are guaranteed to be called by the worker in question */
+static void
+sgen_worker_init_callback (gpointer worker_untyped)
+{
+ int i;
+ WorkerData *worker = (WorkerData*) worker_untyped;
+ MSBlockInfo ***worker_free_blocks = (MSBlockInfo ***) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo**) * MS_BLOCK_TYPE_MAX, INTERNAL_MEM_MS_TABLES, TRUE);
+
+ for (i = 0; i < MS_BLOCK_TYPE_MAX; i++)
+ worker_free_blocks [i] = (MSBlockInfo **) sgen_alloc_internal_dynamic (sizeof (MSBlockInfo*) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE);
+
+ worker->free_block_lists = worker_free_blocks;
+
+ mono_native_tls_set_value (worker_block_free_list_key, worker_free_blocks);
+}
+
static void
-sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurrent)
+sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurrent, gboolean is_parallel)
{
int i;
concurrent_mark = is_concurrent;
collector->is_concurrent = is_concurrent;
+ collector->is_parallel = is_parallel;
collector->needs_thread_pool = is_concurrent || concurrent_sweep;
collector->get_and_reset_num_major_objects_marked = major_get_and_reset_num_major_objects_marked;
collector->supports_cardtable = TRUE;
collector->alloc_degraded = major_alloc_degraded;
collector->alloc_object = major_alloc_object;
+ collector->alloc_object_par = major_alloc_object_par;
collector->free_pinned_object = free_pinned_object;
collector->iterate_objects = major_iterate_objects;
collector->free_non_pinned_object = major_free_non_pinned_object;
collector->major_ops_concurrent_finish.scan_vtype = major_scan_vtype_with_evacuation;
collector->major_ops_concurrent_finish.scan_ptr_field = major_scan_ptr_field_with_evacuation;
collector->major_ops_concurrent_finish.drain_gray_stack = drain_gray_stack;
+
+ if (is_parallel) {
+ collector->major_ops_conc_par_start.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_canonical;
+ collector->major_ops_conc_par_start.scan_object = major_scan_object_concurrent_par_with_evacuation;
+ collector->major_ops_conc_par_start.scan_vtype = major_scan_vtype_concurrent_par_with_evacuation;
+ collector->major_ops_conc_par_start.scan_ptr_field = major_scan_ptr_field_concurrent_par_with_evacuation;
+ collector->major_ops_conc_par_start.drain_gray_stack = drain_gray_stack_concurrent_par;
+
+ collector->major_ops_conc_par_finish.copy_or_mark_object = major_copy_or_mark_object_concurrent_par_finish_canonical;
+ collector->major_ops_conc_par_finish.scan_object = major_scan_object_par_with_evacuation;
+ collector->major_ops_conc_par_finish.scan_vtype = major_scan_vtype_par_with_evacuation;
+ collector->major_ops_conc_par_finish.scan_ptr_field = major_scan_ptr_field_par_with_evacuation;
+ collector->major_ops_conc_par_finish.drain_gray_stack = drain_gray_stack_par;
+
+ collector->worker_init_cb = sgen_worker_init_callback;
+
+ mono_native_tls_alloc (&worker_block_free_list_key, NULL);
+ }
}
#ifdef HEAVY_STATISTICS
void
sgen_marksweep_init (SgenMajorCollector *collector)
{
- sgen_marksweep_init_internal (collector, FALSE);
+ sgen_marksweep_init_internal (collector, FALSE, FALSE);
}
void
sgen_marksweep_conc_init (SgenMajorCollector *collector)
{
- sgen_marksweep_init_internal (collector, TRUE);
+ sgen_marksweep_init_internal (collector, TRUE, FALSE);
+}
+
+void
+sgen_marksweep_conc_par_init (SgenMajorCollector *collector)
+{
+ sgen_marksweep_init_internal (collector, TRUE, TRUE);
}
#endif
#undef SERIAL_SCAN_OBJECT
#undef SERIAL_SCAN_VTYPE
#undef SERIAL_SCAN_PTR_FIELD
+#undef SERIAL_DRAIN_GRAY_STACK
#if defined(SGEN_SIMPLE_NURSERY)
#define SERIAL_SCAN_OBJECT simple_nursery_serial_with_concurrent_major_scan_object
#define SERIAL_SCAN_VTYPE simple_nursery_serial_with_concurrent_major_scan_vtype
#define SERIAL_SCAN_PTR_FIELD simple_nursery_serial_with_concurrent_major_scan_ptr_field
+#define SERIAL_DRAIN_GRAY_STACK simple_nursery_serial_with_concurrent_major_drain_gray_stack
#else
#define SERIAL_SCAN_OBJECT simple_nursery_serial_scan_object
#define SERIAL_SCAN_VTYPE simple_nursery_serial_scan_vtype
#define SERIAL_SCAN_PTR_FIELD simple_nursery_serial_scan_ptr_field
+#define SERIAL_DRAIN_GRAY_STACK simple_nursery_serial_drain_gray_stack
#endif
#elif defined (SGEN_SPLIT_NURSERY)
#define SERIAL_SCAN_OBJECT split_nursery_serial_with_concurrent_major_scan_object
#define SERIAL_SCAN_VTYPE split_nursery_serial_with_concurrent_major_scan_vtype
#define SERIAL_SCAN_PTR_FIELD split_nursery_serial_with_concurrent_major_scan_ptr_field
+#define SERIAL_DRAIN_GRAY_STACK split_nursery_serial_with_concurrent_major_drain_gray_stack
#else
#define SERIAL_SCAN_OBJECT split_nursery_serial_scan_object
#define SERIAL_SCAN_VTYPE split_nursery_serial_scan_vtype
#define SERIAL_SCAN_PTR_FIELD split_nursery_serial_scan_ptr_field
+#define SERIAL_DRAIN_GRAY_STACK split_nursery_serial_drain_gray_stack
#endif
#else
HANDLE_PTR (ptr, NULL);
}
+static gboolean
+SERIAL_DRAIN_GRAY_STACK (SgenGrayQueue *queue)
+{
+ for (;;) {
+ GCObject *obj;
+ SgenDescriptor desc;
+
+ GRAY_OBJECT_DEQUEUE_SERIAL (queue, &obj, &desc);
+ if (!obj)
+ return TRUE;
+
+ SERIAL_SCAN_OBJECT (obj, desc, queue);
+ }
+}
+
#define FILL_MINOR_COLLECTOR_SCAN_OBJECT(ops) do { \
(ops)->scan_object = SERIAL_SCAN_OBJECT; \
(ops)->scan_vtype = SERIAL_SCAN_VTYPE; \
(ops)->scan_ptr_field = SERIAL_SCAN_PTR_FIELD; \
+ (ops)->drain_gray_stack = SERIAL_DRAIN_GRAY_STACK; \
} while (0)
if (addr0 < addr1) {
if (unpin_queue)
- GRAY_OBJECT_ENQUEUE (unpin_queue, (GCObject*)addr0, sgen_obj_get_descriptor_safe ((GCObject*)addr0));
+ GRAY_OBJECT_ENQUEUE_SERIAL (unpin_queue, (GCObject*)addr0, sgen_obj_get_descriptor_safe ((GCObject*)addr0));
else
SGEN_UNPIN_OBJECT (addr0);
size = SGEN_ALIGN_UP (sgen_safe_object_get_size ((GCObject*)addr0));
SGEN_ASSERT (5, sgen_ptr_in_nursery (obj), "Can only cement pointers to nursery objects");
if (!hash [i].obj) {
- SGEN_ASSERT (5, !hash [i].count, "Cementing hash inconsistent");
- hash [i].obj = obj;
+ GCObject *old_obj;
+ old_obj = InterlockedCompareExchangePointer ((gpointer*)&hash [i].obj, obj, NULL);
+ /* Check if the slot was occupied by some other object */
+ if (old_obj != NULL && old_obj != obj)
+ return FALSE;
} else if (hash [i].obj != obj) {
return FALSE;
}
if (hash [i].count >= SGEN_CEMENT_THRESHOLD)
return TRUE;
- ++hash [i].count;
- if (hash [i].count == SGEN_CEMENT_THRESHOLD) {
+ if (InterlockedIncrement ((gint32*)&hash [i].count) == SGEN_CEMENT_THRESHOLD) {
SGEN_ASSERT (9, sgen_get_current_collection_generation () >= 0, "We can only cement objects when we're in a collection pause.");
SGEN_ASSERT (9, SGEN_OBJECT_IS_PINNED (obj), "Can only cement pinned objects");
SGEN_CEMENT_OBJECT (obj);
protocol_entry (unsigned char type, gpointer data, int size)
{
int index;
+ gboolean include_worker_index = type != PROTOCOL_ID (binary_protocol_header);
+ int entry_size = size + 1 + (include_worker_index ? 1 : 0); // type + worker_index + size
BinaryProtocolBuffer *buffer;
if (binary_protocol_file == invalid_file_value)
return;
- if (sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()))
- type |= 0x80;
-
lock_recursive ();
retry:
buffer = binary_protocol_get_buffer (size + 1);
retry_same_buffer:
index = buffer->index;
- if (index + 1 + size > BINARY_PROTOCOL_BUFFER_SIZE)
+ if (index + entry_size > BINARY_PROTOCOL_BUFFER_SIZE)
goto retry;
- if (InterlockedCompareExchange (&buffer->index, index + 1 + size, index) != index)
+ if (InterlockedCompareExchange (&buffer->index, index + entry_size, index) != index)
goto retry_same_buffer;
/* FIXME: if we're interrupted at this point, we have a buffer
entry that contains random data. */
buffer->buffer [index++] = type;
+ /* We should never change the header format */
+ if (include_worker_index) {
+ /*
+ * If the thread is not a worker thread we insert 0, which is interpreted
+ * as gc thread. Worker indexes are 1 based.
+ */
+ buffer->buffer [index++] = (unsigned char) sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ());
+ }
memcpy (buffer->buffer + index, data, size);
index += size;
#include "sgen-gc.h"
#define PROTOCOL_HEADER_CHECK 0xde7ec7ab1ec0de
-#define PROTOCOL_HEADER_VERSION 1
+/*
+ * The version needs to be bumped every time we introduce breaking changes (like
+ * adding new protocol entries or various format changes). The latest protocol grepper
+ * should be able to handle all the previous versions, while an old grepper will
+ * be able to tell if it cannot handle the format.
+ */
+#define PROTOCOL_HEADER_VERSION 2
/* Special indices returned by MATCH_INDEX. */
#define BINARY_PROTOCOL_NO_MATCH (-1)
#include "mono/utils/mono-threads.h"
#endif
+#define MAX_NUM_THREADS 8
+
static mono_mutex_t lock;
static mono_cond_t work_cond;
static mono_cond_t done_cond;
-static MonoNativeThreadId thread;
+static int threads_num = 0;
+static MonoNativeThreadId threads [MAX_NUM_THREADS];
/* Only accessed with the lock held. */
static SgenPointerQueue job_queue;
static SgenThreadPoolThreadInitFunc thread_init_func;
static SgenThreadPoolIdleJobFunc idle_job_func;
static SgenThreadPoolContinueIdleJobFunc continue_idle_job_func;
+static SgenThreadPoolShouldWorkFunc should_work_func;
static volatile gboolean threadpool_shutdown;
-static volatile gboolean thread_finished;
+static volatile int threads_finished = 0;
enum {
STATE_WAITING,
}
static gboolean
-continue_idle_job (void)
+continue_idle_job (void *thread_data)
{
if (!continue_idle_job_func)
return FALSE;
- return continue_idle_job_func ();
+ return continue_idle_job_func (thread_data);
+}
+
+static gboolean
+should_work (void *thread_data)
+{
+ if (!should_work_func)
+ return TRUE;
+ return should_work_func (thread_data);
}
static mono_native_thread_return_t
mono_os_mutex_lock (&lock);
for (;;) {
+ gboolean do_idle;
+ SgenThreadPoolJob *job;
+
+ if (!should_work (thread_data)) {
+ mono_os_cond_wait (&work_cond, &lock);
+ continue;
+ }
/*
* It's important that we check the continue idle flag with the lock held.
* Suppose we didn't check with the lock held, and the result is FALSE. The
* main thread might then set continue idle and signal us before we can take
* the lock, and we'd lose the signal.
*/
- gboolean do_idle = continue_idle_job ();
- SgenThreadPoolJob *job = get_job_and_set_in_progress ();
+ do_idle = continue_idle_job (thread_data);
+ job = get_job_and_set_in_progress ();
if (!job && !do_idle && !threadpool_shutdown) {
/*
SGEN_ASSERT (0, idle_job_func, "Why do we have idle work when there's no idle job function?");
do {
idle_job_func (thread_data);
- do_idle = continue_idle_job ();
+ do_idle = continue_idle_job (thread_data);
} while (do_idle && !job_queue.next_slot);
mono_os_mutex_lock (&lock);
} else {
SGEN_ASSERT (0, threadpool_shutdown, "Why did we unlock if no jobs and not shutting down?");
mono_os_mutex_lock (&lock);
- thread_finished = TRUE;
+ threads_finished++;
mono_os_cond_signal (&done_cond);
mono_os_mutex_unlock (&lock);
return 0;
}
void
-sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, SgenThreadPoolContinueIdleJobFunc continue_idle_func, void **thread_datas)
+sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, SgenThreadPoolContinueIdleJobFunc continue_idle_func, SgenThreadPoolShouldWorkFunc should_work_func_p, void **thread_datas)
{
- SGEN_ASSERT (0, num_threads == 1, "We only support 1 thread pool thread for now.");
+ int i;
+
+ threads_num = (num_threads < MAX_NUM_THREADS) ? num_threads : MAX_NUM_THREADS;
mono_os_mutex_init (&lock);
mono_os_cond_init (&work_cond);
thread_init_func = init_func;
idle_job_func = idle_func;
continue_idle_job_func = continue_idle_func;
+ should_work_func = should_work_func_p;
- mono_native_thread_create (&thread, thread_func, thread_datas ? thread_datas [0] : NULL);
+ for (i = 0; i < threads_num; i++)
+ mono_native_thread_create (&threads [i], thread_func, thread_datas ? thread_datas [i] : NULL);
}
void
sgen_thread_pool_shutdown (void)
{
- if (!thread)
+ if (!threads_num)
return;
mono_os_mutex_lock (&lock);
threadpool_shutdown = TRUE;
- mono_os_cond_signal (&work_cond);
- while (!thread_finished)
+ mono_os_cond_broadcast (&work_cond);
+ while (threads_finished < threads_num)
mono_os_cond_wait (&done_cond, &lock);
mono_os_mutex_unlock (&lock);
mono_os_mutex_lock (&lock);
sgen_pointer_queue_add (&job_queue, job);
- /*
- * FIXME: We could check whether there is a job in progress. If there is, there's
- * no need to signal the condition, at least as long as we have only one thread.
- */
mono_os_cond_signal (&work_cond);
mono_os_mutex_unlock (&lock);
mono_os_mutex_lock (&lock);
- if (continue_idle_job_func ())
- mono_os_cond_signal (&work_cond);
+ if (continue_idle_job_func (NULL))
+ mono_os_cond_broadcast (&work_cond);
mono_os_mutex_unlock (&lock);
}
mono_os_mutex_lock (&lock);
- while (continue_idle_job_func ())
+ while (continue_idle_job_func (NULL))
mono_os_cond_wait (&done_cond, &lock);
mono_os_mutex_unlock (&lock);
mono_os_mutex_unlock (&lock);
}
-gboolean
+/* Return 0 if is not a thread pool thread or the thread number otherwise */
+int
sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId some_thread)
{
- return some_thread == thread;
+ int i;
+
+ for (i = 0; i < threads_num; i++) {
+ if (some_thread == threads [i])
+ return i + 1;
+ }
+
+ return 0;
}
#endif
typedef void (*SgenThreadPoolThreadInitFunc) (void*);
typedef void (*SgenThreadPoolIdleJobFunc) (void*);
-typedef gboolean (*SgenThreadPoolContinueIdleJobFunc) (void);
+typedef gboolean (*SgenThreadPoolContinueIdleJobFunc) (void*);
+typedef gboolean (*SgenThreadPoolShouldWorkFunc) (void*);
-void sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, SgenThreadPoolContinueIdleJobFunc continue_idle_func, void **thread_datas);
+void sgen_thread_pool_init (int num_threads, SgenThreadPoolThreadInitFunc init_func, SgenThreadPoolIdleJobFunc idle_func, SgenThreadPoolContinueIdleJobFunc continue_idle_func, SgenThreadPoolShouldWorkFunc should_work_func, void **thread_datas);
void sgen_thread_pool_shutdown (void);
void sgen_thread_pool_wait_for_all_jobs (void);
-gboolean sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId thread);
+int sgen_thread_pool_is_thread_pool_thread (MonoNativeThreadId thread);
#endif
#include "mono/sgen/sgen-client.h"
static int workers_num;
+static int active_workers_num;
static volatile gboolean forced_stop;
static WorkerData *workers_data;
+static SgenWorkerCallback worker_init_cb;
+
+/*
+ * When using multiple workers, we need to have the last worker
+ * enqueue the preclean jobs (if there are any). This lock ensures
+ * that when the last worker takes it, all the other workers have
+ * gracefully finished, so it can restart them.
+ */
+static mono_mutex_t finished_lock;
+static volatile gboolean workers_finished;
+static int worker_awakenings;
static SgenSectionGrayQueue workers_distribute_gray_queue;
static gboolean workers_distribute_gray_queue_inited;
*
* | from \ to | NOT WORKING | WORKING | WORK ENQUEUED |
* |--------------------+-------------+---------+---------------+
- * | NOT WORKING | - | - | main |
- * | WORKING | worker | - | main |
+ * | NOT WORKING | - | - | main / worker |
+ * | WORKING | worker | - | main / worker |
* | WORK ENQUEUED | - | worker | - |
*
* The WORK ENQUEUED state guarantees that the worker thread will inspect the queue again at
typedef gint32 State;
-static volatile State workers_state;
-
static SgenObjectOperations * volatile idle_func_object_ops;
-static SgenThreadPoolJob * volatile preclean_job;
+static SgenObjectOperations *idle_func_object_ops_par, *idle_func_object_ops_nopar;
+/*
+ * finished_callback is called only when the workers finish work normally (when they
+ * are not forced to finish). The callback is used to enqueue preclean jobs.
+ */
+static volatile SgenWorkersFinishCallback finish_callback;
static guint64 stat_workers_num_finished;
static gboolean
-set_state (State old_state, State new_state)
+set_state (WorkerData *data, State old_state, State new_state)
{
SGEN_ASSERT (0, old_state != new_state, "Why are we transitioning to the same state?");
if (new_state == STATE_NOT_WORKING)
if (new_state == STATE_NOT_WORKING || new_state == STATE_WORKING)
SGEN_ASSERT (6, sgen_thread_pool_is_thread_pool_thread (mono_native_thread_id_get ()), "Only the worker thread is allowed to transition to NOT_WORKING or WORKING");
- return InterlockedCompareExchange (&workers_state, new_state, old_state) == old_state;
+ return InterlockedCompareExchange (&data->state, new_state, old_state) == old_state;
}
static gboolean
static void
sgen_workers_ensure_awake (void)
{
- State old_state;
- gboolean did_set_state;
+ int i;
+ gboolean need_signal = FALSE;
- do {
- old_state = workers_state;
+ /*
+ * All workers are awaken, make sure we reset the parallel context.
+ * We call this function only when starting the workers so nobody is running,
+ * or when the last worker is enqueuing preclean work. In both cases we can't
+ * have a worker working using a nopar context, which means it is safe.
+ */
+ idle_func_object_ops = (active_workers_num > 1) ? idle_func_object_ops_par : idle_func_object_ops_nopar;
+ workers_finished = FALSE;
- if (old_state == STATE_WORK_ENQUEUED)
- break;
+ for (i = 0; i < active_workers_num; i++) {
+ State old_state;
+ gboolean did_set_state;
+
+ do {
+ old_state = workers_data [i].state;
+
+ if (old_state == STATE_WORK_ENQUEUED)
+ break;
- did_set_state = set_state (old_state, STATE_WORK_ENQUEUED);
- } while (!did_set_state);
+ did_set_state = set_state (&workers_data [i], old_state, STATE_WORK_ENQUEUED);
+ } while (!did_set_state);
- if (!state_is_working_or_enqueued (old_state))
+ if (!state_is_working_or_enqueued (old_state))
+ need_signal = TRUE;
+ }
+
+ if (need_signal)
sgen_thread_pool_idle_signal ();
}
worker_try_finish (WorkerData *data)
{
State old_state;
+ int i, working = 0;
++stat_workers_num_finished;
+ mono_os_mutex_lock (&finished_lock);
+
+ for (i = 0; i < active_workers_num; i++) {
+ if (state_is_working_or_enqueued (workers_data [i].state))
+ working++;
+ }
+
+ if (working == 1) {
+ SgenWorkersFinishCallback callback = finish_callback;
+ SGEN_ASSERT (0, idle_func_object_ops == idle_func_object_ops_nopar, "Why are we finishing with parallel context");
+ /* We are the last one left. Enqueue preclean job if we have one and awake everybody */
+ SGEN_ASSERT (0, data->state != STATE_NOT_WORKING, "How did we get from doing idle work to NOT WORKING without setting it ourselves?");
+ if (callback) {
+ finish_callback = NULL;
+ callback ();
+ worker_awakenings = 0;
+ /* Make sure each worker has a chance of seeing the enqueued jobs */
+ sgen_workers_ensure_awake ();
+ SGEN_ASSERT (0, data->state == STATE_WORK_ENQUEUED, "Why did we fail to set our own state to ENQUEUED");
+ goto work_available;
+ }
+ }
+
do {
- old_state = workers_state;
+ old_state = data->state;
SGEN_ASSERT (0, old_state != STATE_NOT_WORKING, "How did we get from doing idle work to NOT WORKING without setting it ourselves?");
if (old_state == STATE_WORK_ENQUEUED)
- return;
+ goto work_available;
SGEN_ASSERT (0, old_state == STATE_WORKING, "What other possibility is there?");
+ } while (!set_state (data, old_state, STATE_NOT_WORKING));
- /* We are the last thread to go to sleep. */
- } while (!set_state (old_state, STATE_NOT_WORKING));
+ /*
+ * If we are second to last to finish, we set the scan context to the non-parallel
+ * version so we can speed up the last worker. This helps us maintain same level
+ * of performance as non-parallel mode even if we fail to distribute work properly.
+ */
+ if (working == 2)
+ idle_func_object_ops = idle_func_object_ops_nopar;
+
+ workers_finished = TRUE;
+ mono_os_mutex_unlock (&finished_lock);
binary_protocol_worker_finish (sgen_timestamp (), forced_stop);
sgen_gray_object_queue_trim_free_list (&data->private_gray_queue);
+ return;
+
+work_available:
+ mono_os_mutex_unlock (&finished_lock);
}
void
sgen_thread_pool_job_enqueue (job);
}
-void
-sgen_workers_wait_for_jobs_finished (void)
-{
- sgen_thread_pool_wait_for_all_jobs ();
- /*
- * If the idle task was never triggered or it finished before the last job did and
- * then didn't get triggered again, we might end up in the situation of having
- * something in the gray queue yet the idle task not working. The easiest way to
- * make sure this doesn't stay that way is to just trigger it again after all jobs
- * have finished.
- */
- sgen_workers_ensure_awake ();
-}
-
static gboolean
workers_get_work (WorkerData *data)
{
if (major->is_concurrent) {
GrayQueueSection *section = sgen_section_gray_queue_dequeue (&workers_distribute_gray_queue);
if (section) {
- sgen_gray_object_enqueue_section (&data->private_gray_queue, section);
+ sgen_gray_object_enqueue_section (&data->private_gray_queue, section, major->is_parallel);
return TRUE;
}
}
return FALSE;
}
+static gboolean
+workers_steal_work (WorkerData *data)
+{
+ SgenMajorCollector *major = sgen_get_major_collector ();
+ GrayQueueSection *section = NULL;
+ int i, current_worker;
+
+ if (!major->is_parallel)
+ return FALSE;
+
+ /* If we're parallel, steal from other workers' private gray queues */
+ g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+
+ current_worker = (int) (data - workers_data);
+
+ for (i = 1; i < active_workers_num && !section; i++) {
+ int steal_worker = (current_worker + i) % active_workers_num;
+ if (state_is_working_or_enqueued (workers_data [steal_worker].state))
+ section = sgen_gray_object_steal_section (&workers_data [steal_worker].private_gray_queue);
+ }
+
+ if (section) {
+ sgen_gray_object_enqueue_section (&data->private_gray_queue, section, TRUE);
+ return TRUE;
+ }
+
+ /* Nobody to steal from */
+ g_assert (sgen_gray_object_queue_is_empty (&data->private_gray_queue));
+ return FALSE;
+}
+
static void
concurrent_enqueue_check (GCObject *obj)
{
return;
init_private_gray_queue (data);
+
+ if (worker_init_cb)
+ worker_init_cb (data);
+}
+
+static gboolean
+continue_idle_func (void *data_untyped)
+{
+ if (data_untyped) {
+ WorkerData *data = (WorkerData *)data_untyped;
+ return state_is_working_or_enqueued (data->state);
+ } else {
+ /* Return if any of the threads is working */
+ return !sgen_workers_all_done ();
+ }
}
static gboolean
-continue_idle_func (void)
+should_work_func (void *data_untyped)
{
- return state_is_working_or_enqueued (workers_state);
+ WorkerData *data = (WorkerData*)data_untyped;
+ int current_worker = (int) (data - workers_data);
+
+ return current_worker < active_workers_num;
}
static void
{
WorkerData *data = (WorkerData *)data_untyped;
- SGEN_ASSERT (0, continue_idle_func (), "Why are we called when we're not supposed to work?");
+ SGEN_ASSERT (0, continue_idle_func (data_untyped), "Why are we called when we're not supposed to work?");
SGEN_ASSERT (0, sgen_concurrent_collection_in_progress (), "The worker should only mark in concurrent collections.");
- if (workers_state == STATE_WORK_ENQUEUED) {
- set_state (STATE_WORK_ENQUEUED, STATE_WORKING);
- SGEN_ASSERT (0, workers_state != STATE_NOT_WORKING, "How did we get from WORK ENQUEUED to NOT WORKING?");
+ if (data->state == STATE_WORK_ENQUEUED) {
+ set_state (data, STATE_WORK_ENQUEUED, STATE_WORKING);
+ SGEN_ASSERT (0, data->state != STATE_NOT_WORKING, "How did we get from WORK ENQUEUED to NOT WORKING?");
}
- if (!forced_stop && (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data))) {
+ if (!forced_stop && (!sgen_gray_object_queue_is_empty (&data->private_gray_queue) || workers_get_work (data) || workers_steal_work (data))) {
ScanCopyContext ctx = CONTEXT_FROM_OBJECT_OPERATIONS (idle_func_object_ops, &data->private_gray_queue);
SGEN_ASSERT (0, !sgen_gray_object_queue_is_empty (&data->private_gray_queue), "How is our gray queue empty if we just got work?");
sgen_drain_gray_stack (ctx);
- } else {
- SgenThreadPoolJob *job = preclean_job;
- if (job) {
- sgen_thread_pool_job_enqueue (job);
- preclean_job = NULL;
- } else {
- worker_try_finish (data);
+
+ if (data->private_gray_queue.num_sections > 16 && workers_finished && worker_awakenings < active_workers_num) {
+ /* We bound the number of worker awakenings just to be sure */
+ worker_awakenings++;
+ sgen_workers_ensure_awake ();
}
+ } else {
+ worker_try_finish (data);
}
}
}
void
-sgen_workers_init (int num_workers)
+sgen_workers_init (int num_workers, SgenWorkerCallback callback)
{
int i;
void **workers_data_ptrs = (void **)alloca(num_workers * sizeof(void *));
if (!sgen_get_major_collector ()->is_concurrent) {
- sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL);
+ sgen_thread_pool_init (num_workers, thread_pool_init_func, NULL, NULL, NULL, NULL);
return;
}
+ mono_os_mutex_init (&finished_lock);
//g_print ("initing %d workers\n", num_workers);
workers_num = num_workers;
+ active_workers_num = num_workers;
workers_data = (WorkerData *)sgen_alloc_internal_dynamic (sizeof (WorkerData) * num_workers, INTERNAL_MEM_WORKER_DATA, TRUE);
memset (workers_data, 0, sizeof (WorkerData) * num_workers);
init_distribute_gray_queue ();
- for (i = 0; i < workers_num; ++i)
+ for (i = 0; i < num_workers; ++i)
workers_data_ptrs [i] = (void *) &workers_data [i];
- sgen_thread_pool_init (num_workers, thread_pool_init_func, marker_idle_func, continue_idle_func, workers_data_ptrs);
+ worker_init_cb = callback;
+
+ sgen_thread_pool_init (num_workers, thread_pool_init_func, marker_idle_func, continue_idle_func, should_work_func, workers_data_ptrs);
mono_counters_register ("# workers finished", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_workers_num_finished);
}
void
sgen_workers_stop_all_workers (void)
{
- preclean_job = NULL;
+ finish_callback = NULL;
mono_memory_write_barrier ();
forced_stop = TRUE;
sgen_thread_pool_wait_for_all_jobs ();
sgen_thread_pool_idle_wait ();
- SGEN_ASSERT (0, workers_state == STATE_NOT_WORKING, "Can only signal enqueue work when in no work state");
+ SGEN_ASSERT (0, sgen_workers_all_done (), "Can only signal enqueue work when in no work state");
+}
+
+void
+sgen_workers_set_num_active_workers (int num_workers)
+{
+ if (num_workers) {
+ SGEN_ASSERT (0, active_workers_num <= workers_num, "We can't start more workers than we initialized");
+ active_workers_num = num_workers;
+ } else {
+ active_workers_num = workers_num;
+ }
}
void
-sgen_workers_start_all_workers (SgenObjectOperations *object_ops, SgenThreadPoolJob *job)
+sgen_workers_start_all_workers (SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback callback)
{
+ idle_func_object_ops_par = object_ops_par;
+ idle_func_object_ops_nopar = object_ops_nopar;
forced_stop = FALSE;
- idle_func_object_ops = object_ops;
- preclean_job = job;
+ finish_callback = callback;
+ worker_awakenings = 0;
mono_memory_write_barrier ();
sgen_workers_ensure_awake ();
sgen_thread_pool_wait_for_all_jobs ();
sgen_thread_pool_idle_wait ();
- SGEN_ASSERT (0, workers_state == STATE_NOT_WORKING, "Can only signal enqueue work when in no work state");
+ SGEN_ASSERT (0, sgen_workers_all_done (), "Can only signal enqueue work when in no work state");
/* At this point all the workers have stopped. */
SGEN_ASSERT (0, sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue), "Why is there still work left to do?");
- for (i = 0; i < workers_num; ++i)
+ for (i = 0; i < active_workers_num; ++i)
SGEN_ASSERT (0, sgen_gray_object_queue_is_empty (&workers_data [i].private_gray_queue), "Why is there still work left to do?");
}
if (!sgen_section_gray_queue_is_empty (&workers_distribute_gray_queue))
return TRUE;
- for (i = 0; i < workers_num; ++i) {
+ for (i = 0; i < active_workers_num; ++i) {
if (!sgen_gray_object_queue_is_empty (&workers_data [i].private_gray_queue))
return TRUE;
}
gboolean
sgen_workers_all_done (void)
{
- return workers_state == STATE_NOT_WORKING;
+ int i;
+
+ for (i = 0; i < active_workers_num; i++) {
+ if (state_is_working_or_enqueued (workers_data [i].state))
+ return FALSE;
+ }
+ return TRUE;
}
/* Must only be used for debugging */
gboolean
sgen_workers_are_working (void)
{
- return state_is_working_or_enqueued (workers_state);
+ return !sgen_workers_all_done ();
}
void
}
void
-sgen_workers_take_from_queue_and_awake (SgenGrayQueue *queue)
+sgen_workers_take_from_queue (SgenGrayQueue *queue)
{
- gboolean wake = FALSE;
+ sgen_gray_object_spread (queue, sgen_workers_get_job_split_count ());
for (;;) {
GrayQueueSection *section = sgen_gray_object_dequeue_section (queue);
if (!section)
break;
sgen_section_gray_queue_enqueue (&workers_distribute_gray_queue, section);
- wake = TRUE;
}
- if (wake) {
- SGEN_ASSERT (0, sgen_concurrent_collection_in_progress (), "Why is there work to take when there's no concurrent collection in progress?");
- sgen_workers_ensure_awake ();
- }
+ SGEN_ASSERT (0, !sgen_workers_are_working (), "We should fully populate the distribute gray queue before we start the workers");
+}
+
+SgenObjectOperations*
+sgen_workers_get_idle_func_object_ops (void)
+{
+ return (idle_func_object_ops_par) ? idle_func_object_ops_par : idle_func_object_ops_nopar;
+}
+
+/*
+ * If we have a single worker, splitting into multiple jobs makes no sense. With
+ * more than one worker, we split into a larger number of jobs so that, in case
+ * the work load is uneven, a worker that finished quickly can take up more jobs
+ * than another one.
+ */
+int
+sgen_workers_get_job_split_count (void)
+{
+ return (active_workers_num > 1) ? active_workers_num * 4 : 1;
+}
+
+void
+sgen_workers_foreach (SgenWorkerCallback callback)
+{
+ int i;
+
+ for (i = 0; i < workers_num; i++)
+ callback (&workers_data [i]);
}
#endif
typedef struct _WorkerData WorkerData;
struct _WorkerData {
+ gint32 state;
SgenGrayQueue private_gray_queue; /* only read/written by worker thread */
+ /*
+ * Workers allocate major objects only from here. It has same structure as the
+ * global one. This is normally accessed from the worker_block_free_list_key.
+ * We hold it here so we can clear free lists from all threads before sweep
+ * starts.
+ */
+ gpointer free_block_lists;
};
-void sgen_workers_init (int num_workers);
+typedef void (*SgenWorkersFinishCallback) (void);
+typedef void (*SgenWorkerCallback) (WorkerData *data);
+
+void sgen_workers_init (int num_workers, SgenWorkerCallback callback);
void sgen_workers_stop_all_workers (void);
-void sgen_workers_start_all_workers (SgenObjectOperations *object_ops, SgenThreadPoolJob *finish_job);
+void sgen_workers_set_num_active_workers (int num_workers);
+void sgen_workers_start_all_workers (SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par, SgenWorkersFinishCallback finish_job);
void sgen_workers_init_distribute_gray_queue (void);
void sgen_workers_enqueue_job (SgenThreadPoolJob *job, gboolean enqueue);
-void sgen_workers_wait_for_jobs_finished (void);
void sgen_workers_distribute_gray_queue_sections (void);
void sgen_workers_reset_data (void);
void sgen_workers_join (void);
gboolean sgen_workers_all_done (void);
gboolean sgen_workers_are_working (void);
void sgen_workers_assert_gray_queue_is_empty (void);
-void sgen_workers_take_from_queue_and_awake (SgenGrayQueue *queue);
+void sgen_workers_take_from_queue (SgenGrayQueue *queue);
+SgenObjectOperations* sgen_workers_get_idle_func_object_ops (void);
+int sgen_workers_get_job_split_count (void);
+void sgen_workers_foreach (SgenWorkerCallback callback);
#endif
sgen-regular-tests: $(SGEN_REGULAR_TESTS)
$(MAKE) sgen-regular-tests-ms
$(MAKE) sgen-regular-tests-ms-conc
+ $(MAKE) sgen-regular-tests-ms-conc-par
$(MAKE) sgen-regular-tests-ms-conc-split
$(MAKE) sgen-regular-tests-ms-split
$(MAKE) sgen-regular-tests-ms-conc-split-95
MONO_ENV_OPTIONS="--gc=sgen" MONO_GC_DEBUG="" MONO_GC_PARAMS="major=marksweep" $(RUNTIME) $(TEST_RUNNER) $(TEST_RUNNER_ARGS) --testsuite-name $@ --disabled "$(DISABLED_TESTS)" --timeout 900 $(SGEN_REGULAR_TESTS)
sgen-regular-tests-ms-conc: $(SGEN_REGULAR_TESTS) test-runner.exe
MONO_ENV_OPTIONS="--gc=sgen" MONO_GC_DEBUG="" MONO_GC_PARAMS="major=marksweep-conc" $(RUNTIME) $(TEST_RUNNER) $(TEST_RUNNER_ARGS) --testsuite-name $@ --disabled "$(DISABLED_TESTS)" --timeout 900 $(SGEN_REGULAR_TESTS)
+sgen-regular-tests-ms-conc-par: $(SGEN_REGULAR_TESTS) test-runner.exe
+ MONO_ENV_OPTIONS="--gc=sgen" MONO_GC_DEBUG="" MONO_GC_PARAMS="major=marksweep-conc-par" $(RUNTIME) $(TEST_RUNNER) $(TEST_RUNNER_ARGS) --testsuite-name $@ --disabled "$(DISABLED_TESTS)" --timeout 900 $(SGEN_REGULAR_TESTS)
sgen-regular-tests-ms-conc-split: $(SGEN_REGULAR_TESTS) test-runner.exe
MONO_ENV_OPTIONS="--gc=sgen" MONO_GC_DEBUG="" MONO_GC_PARAMS="major=marksweep-conc,minor=split" $(RUNTIME) $(TEST_RUNNER) $(TEST_RUNNER_ARGS) --testsuite-name $@ --disabled "$(DISABLED_TESTS)" --timeout 900 $(SGEN_REGULAR_TESTS)
sgen-regular-tests-ms-split: $(SGEN_REGULAR_TESTS) test-runner.exe
#include "sgen-entry-stream.h"
#include "sgen-grep-binprot.h"
+static int file_version = 0;
+
#ifdef BINPROT_HAS_HEADER
#define PACKED_SUFFIX p
#else
#define MAX_ENTRY_SIZE (1 << 10)
static int
-read_entry (EntryStream *stream, void *data)
+read_entry (EntryStream *stream, void *data, unsigned char *windex)
{
unsigned char type;
ssize_t size;
if (read_stream (stream, &type, 1) <= 0)
return SGEN_PROTOCOL_EOF;
+
+ if (windex) {
+ if (file_version >= 2) {
+ if (read_stream (stream, windex, 1) <= 0)
+ return SGEN_PROTOCOL_EOF;
+ } else {
+ *windex = !!(WORKER (type));
+ }
+ }
+
switch (TYPE (type)) {
#define BEGIN_PROTOCOL_ENTRY0(method) \
}
}
-#define WORKER_PREFIX(t) (WORKER ((t)) ? "w" : " ")
-
enum { NO_COLOR = -1 };
typedef struct {
}
static void
-print_entry (int type, void *data, int num_nums, int *match_indices, gboolean color_output)
+print_entry (int type, void *data, int num_nums, int *match_indices, gboolean color_output, unsigned char worker_index)
{
const char *always_prefix = is_always_match (type) ? " " : "";
- printf ("%s%s ", WORKER_PREFIX (type), always_prefix);
+ if (worker_index)
+ printf ("w%-2d%s ", worker_index, always_prefix);
+ else
+ printf (" %s ", always_prefix);
switch (TYPE (type)) {
{
#ifdef BINPROT_HAS_HEADER
char data [MAX_ENTRY_SIZE];
- int type = read_entry (stream, data);
+ int type = read_entry (stream, data, NULL);
if (type == SGEN_PROTOCOL_EOF)
return FALSE;
if (type == PROTOCOL_ID (binary_protocol_header)) {
PROTOCOL_STRUCT (binary_protocol_header) * str = (PROTOCOL_STRUCT (binary_protocol_header) *) data;
- if (str->check == PROTOCOL_HEADER_CHECK && str->ptr_size == BINPROT_SIZEOF_VOID_P)
+ if (str->check == PROTOCOL_HEADER_CHECK && str->ptr_size == BINPROT_SIZEOF_VOID_P) {
+ if (str->version > PROTOCOL_HEADER_VERSION) {
+ fprintf (stderr, "The file contains a newer version %d. We support up to %d. Please update.\n", str->version, PROTOCOL_HEADER_VERSION);
+ exit (1);
+ }
+ file_version = str->version;
return TRUE;
+ }
}
return FALSE;
#else
gboolean dump_all, gboolean pause_times, gboolean color_output, unsigned long long first_entry_to_consider)
{
int type;
+ unsigned char worker_index;
void *data = g_malloc0 (MAX_ENTRY_SIZE);
int i;
gboolean pause_times_stopped = FALSE;
return FALSE;
entry_index = 0;
- while ((type = read_entry (stream, data)) != SGEN_PROTOCOL_EOF) {
+ while ((type = read_entry (stream, data, &worker_index)) != SGEN_PROTOCOL_EOF) {
if (entry_index < first_entry_to_consider)
goto next_entry;
if (pause_times) {
if (dump_all)
printf (match ? "* " : " ");
if (match || dump_all)
- print_entry (type, data, num_nums, match_indices, color_output);
+ print_entry (type, data, num_nums, match_indices, color_output, worker_index);
}
next_entry:
++entry_index;