From 32eb3111eebc961d98664e6ceb98dd0f1e16633b Mon Sep 17 00:00:00 2001 From: Rodrigo Kumpera Date: Wed, 10 May 2017 13:38:40 -0700 Subject: [PATCH] [runtime] Rework how unaligned memcpy/memset are generated. Problems with the current code: It's wrong on 64bits if align == 4 It's wrong on 64bits if offset is not 0 or 4. It generates bad code if align > 1 && align < SIZEOF_VOIDP This PR implements unaligned based on the following set of assumptions that are true for the current set of platforms we support: - A load/store of size X works with data aligned on X. IE, a short store can handle addresses aligned to 2 - Unaligned access is either disallowed or plain slower. - Both source and dest abide to the supplied alignment. The way we emit unaligned access is by emitting the widest possible access based on alignment and offset. TBH: use unaligned access on targets like x86 that support it and are profitable. --- mono/mini/memory-access.c | 94 ++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/mono/mini/memory-access.c b/mono/mini/memory-access.c index f20d6790eea..34edb52b338 100644 --- a/mono/mini/memory-access.c +++ b/mono/mini/memory-access.c @@ -20,10 +20,10 @@ mini_emit_memset (MonoCompile *cfg, int destreg, int offset, int size, int val, { int val_reg; + /*FIXME arbitrary hack to avoid unbound code expansion.*/ + g_assert (size < 10000); g_assert (val == 0); - - if (align == 0) - align = 4; + g_assert (align > 0); if ((size <= SIZEOF_REGISTER) && (size <= align)) { switch (size) { @@ -51,39 +51,51 @@ mini_emit_memset (MonoCompile *cfg, int destreg, int offset, int size, int val, else MONO_EMIT_NEW_ICONST (cfg, val_reg, val); - if (align < 4) { - /* This could be optimized further if neccesary */ - while (size >= 1) { - MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, offset, val_reg); - offset += 1; - size -= 1; - } - return; - } - - if (!cfg->backend->no_unaligned_access && SIZEOF_REGISTER == 8) { - if (offset % 8) { - MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, destreg, offset, val_reg); - offset += 4; - size -= 4; - } + if (align < SIZEOF_VOID_P) { + if (align % 2 == 1) + goto set_1; + if (align % 4 == 2) + goto set_2; + if (SIZEOF_VOID_P == 8 && align % 8 == 4) + goto set_4; + } + + //Unaligned offsets don't naturaly happen in the runtime, so it's ok to be conservative in how we copy + //We assume that input src and dest are be aligned to `align` so offset just worsen it + int offsets_mask = offset & 0x7; //we only care about the misalignment part + if (offsets_mask) { + if (offsets_mask % 2 == 1) + goto set_1; + if (offsets_mask % 4 == 2) + goto set_2; + if (SIZEOF_VOID_P == 8 && offsets_mask % 8 == 4) + goto set_4; + } + + if (SIZEOF_REGISTER == 8) { while (size >= 8) { MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, destreg, offset, val_reg); offset += 8; size -= 8; } - } + } +set_4: while (size >= 4) { MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, destreg, offset, val_reg); offset += 4; size -= 4; } + + +set_2: while (size >= 2) { MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI2_MEMBASE_REG, destreg, offset, val_reg); offset += 2; size -= 2; } + +set_1: while (size >= 1) { MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, offset, val_reg); offset += 1; @@ -96,25 +108,32 @@ mini_emit_memcpy (MonoCompile *cfg, int destreg, int doffset, int srcreg, int so { int cur_reg; - if (align == 0) - align = 4; - /*FIXME arbitrary hack to avoid unbound code expansion.*/ g_assert (size < 10000); + g_assert (align > 0); + + if (align < SIZEOF_VOID_P) { + if (align == 4) + goto copy_4; + if (align == 2) + goto copy_2; + goto copy_1; + } - if (align < 4) { - /* This could be optimized further if neccesary */ - while (size >= 1) { - cur_reg = alloc_preg (cfg); - MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, cur_reg, srcreg, soffset); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, doffset, cur_reg); - doffset += 1; - soffset += 1; - size -= 1; - } + //Unaligned offsets don't naturaly happen in the runtime, so it's ok to be conservative in how we copy + //We assume that input src and dest are be aligned to `align` so offset just worsen it + int offsets_mask = (doffset | soffset) & 0x7; //we only care about the misalignment part + if (offsets_mask) { + if (offsets_mask % 2 == 1) + goto copy_1; + if (offsets_mask % 4 == 2) + goto copy_2; + if (SIZEOF_VOID_P == 8 && offsets_mask % 8 == 4) + goto copy_4; } - if (!cfg->backend->no_unaligned_access && SIZEOF_REGISTER == 8) { + + if (SIZEOF_REGISTER == 8) { while (size >= 8) { cur_reg = alloc_preg (cfg); MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI8_MEMBASE, cur_reg, srcreg, soffset); @@ -123,8 +142,9 @@ mini_emit_memcpy (MonoCompile *cfg, int destreg, int doffset, int srcreg, int so soffset += 8; size -= 8; } - } + } +copy_4: while (size >= 4) { cur_reg = alloc_preg (cfg); MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, cur_reg, srcreg, soffset); @@ -133,6 +153,8 @@ mini_emit_memcpy (MonoCompile *cfg, int destreg, int doffset, int srcreg, int so soffset += 4; size -= 4; } + +copy_2: while (size >= 2) { cur_reg = alloc_preg (cfg); MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI2_MEMBASE, cur_reg, srcreg, soffset); @@ -141,6 +163,8 @@ mini_emit_memcpy (MonoCompile *cfg, int destreg, int doffset, int srcreg, int so soffset += 2; size -= 2; } + +copy_1: while (size >= 1) { cur_reg = alloc_preg (cfg); MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, cur_reg, srcreg, soffset); -- 2.25.1