From abd1bd5d8f633cfdc43570433ed416145e887250 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Wed, 21 Jun 2017 23:08:07 +0300 Subject: [PATCH] [sgen] Fix MS block size calculation. The block size must be a multiple of the system page size. Page size simply cannot be determined reliably at compile time, so we must obtain the page size through a system call on startup and use it as the block size. We still enforce a minimum block size of 16kb on systems where the page size is smaller than that. The old code hardcoded page sizes for certain architectures with large pages. THe problem is that those architectures don't necessarily have to have large pages and on systems where this wasn't the case, we'd end up with a block size much larger than needed. Rebase of @alexrp's work. --- mono/sgen/sgen-archdep.h | 7 --- mono/sgen/sgen-marksweep.c | 114 +++++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 56 deletions(-) diff --git a/mono/sgen/sgen-archdep.h b/mono/sgen/sgen-archdep.h index 274f6dac2fd..9fa6e2141c2 100644 --- a/mono/sgen/sgen-archdep.h +++ b/mono/sgen/sgen-archdep.h @@ -39,13 +39,6 @@ #define REDZONE_SIZE 224 -/* MS_BLOCK_SIZE must be a multiple of the system pagesize, which for some - architectures is 64k. */ -#if defined(TARGET_POWERPC) || defined(TARGET_POWERPC64) -#define ARCH_MIN_MS_BLOCK_SIZE (64*1024) -#define ARCH_MIN_MS_BLOCK_SIZE_SHIFT 16 -#endif - #elif defined(TARGET_ARM) #define REDZONE_SIZE 0 diff --git a/mono/sgen/sgen-marksweep.c b/mono/sgen/sgen-marksweep.c index 12b06055aff..e496288a2e7 100644 --- a/mono/sgen/sgen-marksweep.c +++ b/mono/sgen/sgen-marksweep.c @@ -33,15 +33,15 @@ #include "mono/sgen/sgen-client.h" #include "mono/utils/mono-memory-model.h" -#if defined(ARCH_MIN_MS_BLOCK_SIZE) && defined(ARCH_MIN_MS_BLOCK_SIZE_SHIFT) -#define MS_BLOCK_SIZE ARCH_MIN_MS_BLOCK_SIZE -#define MS_BLOCK_SIZE_SHIFT ARCH_MIN_MS_BLOCK_SIZE_SHIFT -#else -#define MS_BLOCK_SIZE_SHIFT 14 /* INT FASTENABLE */ -#define MS_BLOCK_SIZE (1 << MS_BLOCK_SIZE_SHIFT) -#endif -#define MAJOR_SECTION_SIZE MS_BLOCK_SIZE -#define CARDS_PER_BLOCK (MS_BLOCK_SIZE / CARD_SIZE_IN_BYTES) +static int ms_block_size; + +/* + * Blocks must be at least this size, meaning that if we detect a + * page size lower than this, we'll use this instead. + */ +#define MS_BLOCK_SIZE_MIN (1024 * 16) + +#define CARDS_PER_BLOCK (ms_block_size / CARD_SIZE_IN_BYTES) /* * Don't allocate single blocks, but alloc a contingent of this many @@ -49,16 +49,22 @@ */ #define MS_BLOCK_ALLOC_NUM 32 +#define MS_NUM_MARK_WORDS ((ms_block_size / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8)) + +/* + * Use this instead of sizeof (MSBlockInfo) since the mark_words + * array size depends on page size at runtime. + */ +#define SIZEOF_MS_BLOCK_INFO (sizeof (MSBlockInfo) + sizeof (guint32) * (MS_NUM_MARK_WORDS - MONO_ZERO_LEN_ARRAY)) + /* * Number of bytes before the first object in a block. At the start * of a block is the MSBlockHeader, then opional padding, then come - * the objects, so this must be >= sizeof (MSBlockHeader). + * the objects, so this must be >= SIZEOF_MS_BLOCK_INFO. */ -#define MS_BLOCK_SKIP ((sizeof (MSBlockHeader) + 15) & ~15) +#define MS_BLOCK_SKIP ((SIZEOF_MS_BLOCK_INFO + 15) & ~15) -#define MS_BLOCK_FREE (MS_BLOCK_SIZE - MS_BLOCK_SKIP) - -#define MS_NUM_MARK_WORDS (MS_BLOCK_SIZE / SGEN_ALLOC_ALIGN + sizeof (guint32) * 8 - 1) / (sizeof (guint32) * 8) +#define MS_BLOCK_FREE (ms_block_size - MS_BLOCK_SKIP) /* * Blocks progress from one state to the next: @@ -108,14 +114,14 @@ struct _MSBlockInfo { void ** volatile free_list; MSBlockInfo * volatile next_free; guint8 * volatile cardtable_mod_union; - guint32 mark_words [MS_NUM_MARK_WORDS]; + guint32 mark_words [MONO_ZERO_LEN_ARRAY]; }; #define MS_BLOCK_FOR_BLOCK_INFO(b) ((char*)(b)) #define MS_BLOCK_OBJ(b,i) ((GCObject *)(MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (b)->obj_size * (i))) #define MS_BLOCK_OBJ_FOR_SIZE(b,i,obj_size) (MS_BLOCK_FOR_BLOCK_INFO(b) + MS_BLOCK_SKIP + (obj_size) * (i)) -#define MS_BLOCK_DATA_FOR_OBJ(o) ((char*)((mword)(o) & ~(mword)(MS_BLOCK_SIZE - 1))) +#define MS_BLOCK_DATA_FOR_OBJ(o) ((char*)((mword)(o) & ~(mword)(ms_block_size - 1))) typedef struct { MSBlockInfo info; @@ -150,7 +156,7 @@ typedef struct { } while (0) -#define MS_OBJ_ALLOCED(o,b) (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + MS_BLOCK_SIZE)) +#define MS_OBJ_ALLOCED(o,b) (*(void**)(o) && (*(char**)(o) < MS_BLOCK_FOR_BLOCK_INFO (b) || *(char**)(o) >= MS_BLOCK_FOR_BLOCK_INFO (b) + ms_block_size)) #define MS_BLOCK_OBJ_SIZE_FACTOR (pow (2.0, 1.0 / 3)) @@ -320,7 +326,7 @@ major_alloc_heap (mword nursery_size, mword nursery_align) static void update_heap_boundaries_for_block (MSBlockInfo *block) { - sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE); + sgen_update_heap_boundaries ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), (mword)MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size); } /* @@ -342,7 +348,7 @@ ms_get_empty_block (void) */ int alloc_num = MS_BLOCK_ALLOC_NUM; for (;;) { - p = (char *)sgen_alloc_os_memory_aligned (MS_BLOCK_SIZE * alloc_num, MS_BLOCK_SIZE, + p = (char *)sgen_alloc_os_memory_aligned (ms_block_size * alloc_num, ms_block_size, (SgenAllocFlags)(SGEN_ALLOC_HEAP | SGEN_ALLOC_ACTIVATE), alloc_num == 1 ? "major heap section" : NULL, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); if (p) @@ -361,7 +367,7 @@ ms_get_empty_block (void) empty = empty_blocks; *(void**)block = empty; } while (SGEN_CAS_PTR ((gpointer*)&empty_blocks, block, empty) != empty); - p += MS_BLOCK_SIZE; + p += ms_block_size; } SGEN_ATOMIC_ADD_P (num_empty_blocks, alloc_num); @@ -385,7 +391,7 @@ ms_get_empty_block (void) *(void**)block = NULL; - g_assert (!((mword)block & (MS_BLOCK_SIZE - 1))); + g_assert (!((mword)block & (ms_block_size - 1))); return block; } @@ -400,10 +406,10 @@ ms_free_block (MSBlockInfo *info) void *empty; char *block = MS_BLOCK_FOR_BLOCK_INFO (info); - sgen_memgov_release_space (MS_BLOCK_SIZE, SPACE_MAJOR); + sgen_memgov_release_space (ms_block_size, SPACE_MAJOR); if (info->cardtable_mod_union) - sgen_card_table_free_mod_union (info->cardtable_mod_union, block, MS_BLOCK_SIZE); - memset (block, 0, MS_BLOCK_SIZE); + sgen_card_table_free_mod_union (info->cardtable_mod_union, block, ms_block_size); + memset (block, 0, ms_block_size); do { empty = empty_blocks; @@ -412,7 +418,7 @@ ms_free_block (MSBlockInfo *info) SGEN_ATOMIC_ADD_P (num_empty_blocks, 1); - binary_protocol_block_free (block, MS_BLOCK_SIZE); + binary_protocol_block_free (block, ms_block_size); } static gboolean @@ -527,7 +533,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references) char *obj_start; int i; - if (!sgen_memgov_try_alloc_space (MS_BLOCK_SIZE, SPACE_MAJOR)) + if (!sgen_memgov_try_alloc_space (ms_block_size, SPACE_MAJOR)) return FALSE; info = (MSBlockInfo*)ms_get_empty_block (); @@ -553,7 +559,7 @@ ms_alloc_block (int size_index, gboolean pinned, gboolean has_references) update_heap_boundaries_for_block (info); - binary_protocol_block_alloc (info, MS_BLOCK_SIZE); + binary_protocol_block_alloc (info, ms_block_size); /* build free list */ obj_start = MS_BLOCK_FOR_BLOCK_INFO (info) + MS_BLOCK_SKIP; @@ -581,7 +587,7 @@ ptr_is_in_major_block (char *ptr, char **start, gboolean *pinned) MSBlockInfo *block; FOREACH_BLOCK_NO_LOCK (block) { - if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) { + if (ptr >= MS_BLOCK_FOR_BLOCK_INFO (block) && ptr <= MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) { int count = MS_BLOCK_FREE / block->obj_size; int i; @@ -966,7 +972,7 @@ major_is_valid_object (char *object) int idx; char *obj; - if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= object)) + if ((MS_BLOCK_FOR_BLOCK_INFO (block) > object) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= object)) continue; idx = MS_BLOCK_OBJ_INDEX (object, block); @@ -993,7 +999,7 @@ major_describe_pointer (char *ptr) int w, b; gboolean marked; - if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE) <= ptr)) + if ((MS_BLOCK_FOR_BLOCK_INFO (block) > ptr) || ((MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size) <= ptr)) continue; SGEN_LOG (0, "major-ptr (block %p sz %d pin %d ref %d)\n", @@ -1094,13 +1100,13 @@ get_cardtable_mod_union_for_block (MSBlockInfo *block, gboolean allocate) return mod_union; else if (!allocate) return NULL; - mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + mod_union = sgen_card_table_alloc_mod_union (MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); other = (guint8 *)SGEN_CAS_PTR ((gpointer*)&block->cardtable_mod_union, mod_union, NULL); if (!other) { SGEN_ASSERT (0, block->cardtable_mod_union == mod_union, "Why did CAS not replace?"); return mod_union; } - sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + sgen_card_table_free_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); return other; } @@ -1435,7 +1441,7 @@ try_set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state gint32 old_state = SGEN_CAS (&block->state, new_state, expected_state); gboolean success = old_state == expected_state; if (success) - binary_protocol_block_set_state (block, MS_BLOCK_SIZE, old_state, new_state); + binary_protocol_block_set_state (block, ms_block_size, old_state, new_state); return success; } @@ -1444,7 +1450,7 @@ set_block_state (MSBlockInfo *block, gint32 new_state, gint32 expected_state) { SGEN_ASSERT (6, block->state == expected_state, "Block state incorrect before set"); block->state = new_state; - binary_protocol_block_set_state (block, MS_BLOCK_SIZE, expected_state, new_state); + binary_protocol_block_set_state (block, ms_block_size, expected_state, new_state); } /* @@ -2201,7 +2207,7 @@ major_free_swept_blocks (size_t section_reserve) SGEN_ASSERT (6, first >= 0 && d > first, "algorithm is wrong"); - if ((char*)block != ((char*)empty_block_arr [d-1]) + MS_BLOCK_SIZE) { + if ((char*)block != ((char*)empty_block_arr [d-1]) + ms_block_size) { first = d; continue; } @@ -2215,7 +2221,7 @@ major_free_swept_blocks (size_t section_reserve) * we're iterating. */ int j; - sgen_free_os_memory (empty_block_arr [first], MS_BLOCK_SIZE * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); + sgen_free_os_memory (empty_block_arr [first], ms_block_size * num_blocks, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); for (j = first; j <= d; ++j) empty_block_arr [j] = NULL; dest = first; @@ -2265,7 +2271,7 @@ major_free_swept_blocks (size_t section_reserve) while (num_empty_blocks > section_reserve) { void *next = *(void**)empty_blocks; - sgen_free_os_memory (empty_blocks, MS_BLOCK_SIZE, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); + sgen_free_os_memory (empty_blocks, ms_block_size, SGEN_ALLOC_HEAP, MONO_MEM_ACCOUNT_SGEN_MARKSWEEP); empty_blocks = next; /* * Needs not be atomic because this is running @@ -2286,7 +2292,7 @@ major_pin_objects (SgenGrayQueue *queue) FOREACH_BLOCK_NO_LOCK (block) { size_t first_entry, last_entry; SGEN_ASSERT (6, block_is_swept_or_marking (block), "All blocks must be swept when we're pinning."); - sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SIZE, + sgen_find_optimized_pin_queue_area (MS_BLOCK_FOR_BLOCK_INFO (block) + MS_BLOCK_SKIP, MS_BLOCK_FOR_BLOCK_INFO (block) + ms_block_size, &first_entry, &last_entry); mark_pinned_objects_in_block (block, first_entry, last_entry, queue); } END_FOREACH_BLOCK_NO_LOCK; @@ -2342,7 +2348,7 @@ static size_t get_bytes_survived_last_sweep (void) { SGEN_ASSERT (0, sweep_state == SWEEP_STATE_SWEPT, "Can only query unswept sections after sweep"); - return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * MS_BLOCK_SIZE; + return (num_major_sections_before_sweep - num_major_sections_freed_in_sweep) * ms_block_size; } static gboolean @@ -2396,7 +2402,7 @@ major_iterate_block_ranges (sgen_cardtable_block_callback callback) FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) { if (has_references) - callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); } END_FOREACH_BLOCK_NO_LOCK; } @@ -2409,7 +2415,7 @@ major_iterate_live_block_ranges (sgen_cardtable_block_callback callback) major_finish_sweep_checking (); FOREACH_BLOCK_HAS_REFERENCES_NO_LOCK (block, has_references) { if (has_references) - callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE); + callback ((mword)MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size); } END_FOREACH_BLOCK_NO_LOCK; } @@ -2430,7 +2436,7 @@ static guint8* initial_skip_card (guint8 *card_data) { mword *cards = (mword*)card_data; - mword card; + mword card = 0; int i; for (i = 0; i < CARD_WORDS_PER_BLOCK; ++i) { card = cards [i]; @@ -2458,17 +2464,22 @@ initial_skip_card (guint8 *card_data) #define MS_BLOCK_OBJ_INDEX_FAST(o,b,os) (((char*)(o) - ((b) + MS_BLOCK_SKIP)) / (os)) #define MS_BLOCK_OBJ_FAST(b,os,i) ((b) + MS_BLOCK_SKIP + (os) * (i)) -#define MS_OBJ_ALLOCED_FAST(o,b) (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + MS_BLOCK_SIZE)) +#define MS_OBJ_ALLOCED_FAST(o,b) (*(void**)(o) && (*(char**)(o) < (b) || *(char**)(o) >= (b) + ms_block_size)) static void scan_card_table_for_block (MSBlockInfo *block, CardTableScanType scan_type, ScanCopyContext ctx) { SgenGrayQueue *queue = ctx.queue; ScanObjectFunc scan_func = ctx.ops->scan_object; + /* + * FIXME: On systems with very large pages, we allocate fairly large + * arrays on the stack here. This shouldn't be a problem once block + * size is no longer required to be a multiple of the system page size. + */ #ifndef SGEN_HAVE_OVERLAPPING_CARDS - guint8 cards_copy [CARDS_PER_BLOCK]; + guint8 *cards_copy = alloca (sizeof (guint8) * CARDS_PER_BLOCK); #endif - guint8 cards_preclean [CARDS_PER_BLOCK]; + guint8 *cards_preclean = alloca (sizeof (guint8) * CARDS_PER_BLOCK); gboolean small_objects; int block_obj_size; char *block_start; @@ -2711,7 +2722,7 @@ update_cardtable_mod_union (void) if (has_dirty_cards) { size_t num_cards; guint8 *mod_union = get_cardtable_mod_union_for_block (block, TRUE); - sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), MS_BLOCK_SIZE, &num_cards); + sgen_card_table_update_mod_union (mod_union, MS_BLOCK_FOR_BLOCK_INFO (block), ms_block_size, &num_cards); SGEN_ASSERT (6, num_cards == CARDS_PER_BLOCK, "Number of cards calculation is wrong"); } } END_FOREACH_BLOCK_NO_LOCK; @@ -2752,7 +2763,12 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr { int i; - sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, sizeof (MSBlockInfo)); + ms_block_size = mono_pagesize (); + + if (ms_block_size < MS_BLOCK_SIZE_MIN) + ms_block_size = MS_BLOCK_SIZE_MIN; + + sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, SIZEOF_MS_BLOCK_INFO); num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL); block_obj_sizes = (int *)sgen_alloc_internal_dynamic (sizeof (int) * num_block_obj_sizes, INTERNAL_MEM_MS_TABLES, TRUE); @@ -2797,7 +2813,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr mono_counters_register ("# major blocks freed individually", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_freed_individual); mono_counters_register ("# major blocks allocated less ideally", MONO_COUNTER_GC | MONO_COUNTER_ULONG, &stat_major_blocks_alloced_less_ideal); - collector->section_size = MAJOR_SECTION_SIZE; + collector->section_size = ms_block_size; concurrent_mark = is_concurrent; collector->is_concurrent = is_concurrent; @@ -2906,7 +2922,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr SGEN_ASSERT (0, SGEN_MAX_SMALL_OBJ_SIZE <= MS_BLOCK_FREE / 2, "MAX_SMALL_OBJ_SIZE must be at most MS_BLOCK_FREE / 2"); /*cardtable requires major pages to be 8 cards aligned*/ - g_assert ((MS_BLOCK_SIZE % (8 * CARD_SIZE_IN_BYTES)) == 0); + g_assert ((ms_block_size % (8 * CARD_SIZE_IN_BYTES)) == 0); if (concurrent_sweep) { SgenThreadPool **thread_datas = &sweep_pool; -- 2.25.1