Problems with the current code:
It's wrong on 64bits if align == 4
It's wrong on 64bits if offset is not 0 or 4.
It generates bad code if align > 1 && align < SIZEOF_VOIDP
This PR implements unaligned based on the following set of assumptions that are true for the current set of platforms we support:
- A load/store of size X works with data aligned on X. IE, a short store can handle addresses aligned to 2
- Unaligned access is either disallowed or plain slower.
- Both source and dest abide to the supplied alignment.
The way we emit unaligned access is by emitting the widest possible access based on alignment and offset.
TBH: use unaligned access on targets like x86 that support it and are profitable.
+ /*FIXME arbitrary hack to avoid unbound code expansion.*/
+ g_assert (size < 10000);
-
- if (align == 0)
- align = 4;
if ((size <= SIZEOF_REGISTER) && (size <= align)) {
switch (size) {
if ((size <= SIZEOF_REGISTER) && (size <= align)) {
switch (size) {
else
MONO_EMIT_NEW_ICONST (cfg, val_reg, val);
else
MONO_EMIT_NEW_ICONST (cfg, val_reg, val);
- if (align < 4) {
- /* This could be optimized further if neccesary */
- while (size >= 1) {
- MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, offset, val_reg);
- offset += 1;
- size -= 1;
- }
- return;
- }
-
- if (!cfg->backend->no_unaligned_access && SIZEOF_REGISTER == 8) {
- if (offset % 8) {
- MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, destreg, offset, val_reg);
- offset += 4;
- size -= 4;
- }
+ if (align < SIZEOF_VOID_P) {
+ if (align % 2 == 1)
+ goto set_1;
+ if (align % 4 == 2)
+ goto set_2;
+ if (SIZEOF_VOID_P == 8 && align % 8 == 4)
+ goto set_4;
+ }
+
+ //Unaligned offsets don't naturaly happen in the runtime, so it's ok to be conservative in how we copy
+ //We assume that input src and dest are be aligned to `align` so offset just worsen it
+ int offsets_mask = offset & 0x7; //we only care about the misalignment part
+ if (offsets_mask) {
+ if (offsets_mask % 2 == 1)
+ goto set_1;
+ if (offsets_mask % 4 == 2)
+ goto set_2;
+ if (SIZEOF_VOID_P == 8 && offsets_mask % 8 == 4)
+ goto set_4;
+ }
+
+ if (SIZEOF_REGISTER == 8) {
while (size >= 8) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, destreg, offset, val_reg);
offset += 8;
size -= 8;
}
while (size >= 8) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, destreg, offset, val_reg);
offset += 8;
size -= 8;
}
while (size >= 4) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, destreg, offset, val_reg);
offset += 4;
size -= 4;
}
while (size >= 4) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, destreg, offset, val_reg);
offset += 4;
size -= 4;
}
while (size >= 2) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI2_MEMBASE_REG, destreg, offset, val_reg);
offset += 2;
size -= 2;
}
while (size >= 2) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI2_MEMBASE_REG, destreg, offset, val_reg);
offset += 2;
size -= 2;
}
while (size >= 1) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, offset, val_reg);
offset += 1;
while (size >= 1) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, offset, val_reg);
offset += 1;
- if (align == 0)
- align = 4;
-
/*FIXME arbitrary hack to avoid unbound code expansion.*/
g_assert (size < 10000);
/*FIXME arbitrary hack to avoid unbound code expansion.*/
g_assert (size < 10000);
+ g_assert (align > 0);
+
+ if (align < SIZEOF_VOID_P) {
+ if (align == 4)
+ goto copy_4;
+ if (align == 2)
+ goto copy_2;
+ goto copy_1;
+ }
- if (align < 4) {
- /* This could be optimized further if neccesary */
- while (size >= 1) {
- cur_reg = alloc_preg (cfg);
- MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, cur_reg, srcreg, soffset);
- MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI1_MEMBASE_REG, destreg, doffset, cur_reg);
- doffset += 1;
- soffset += 1;
- size -= 1;
- }
+ //Unaligned offsets don't naturaly happen in the runtime, so it's ok to be conservative in how we copy
+ //We assume that input src and dest are be aligned to `align` so offset just worsen it
+ int offsets_mask = (doffset | soffset) & 0x7; //we only care about the misalignment part
+ if (offsets_mask) {
+ if (offsets_mask % 2 == 1)
+ goto copy_1;
+ if (offsets_mask % 4 == 2)
+ goto copy_2;
+ if (SIZEOF_VOID_P == 8 && offsets_mask % 8 == 4)
+ goto copy_4;
- if (!cfg->backend->no_unaligned_access && SIZEOF_REGISTER == 8) {
+
+ if (SIZEOF_REGISTER == 8) {
while (size >= 8) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI8_MEMBASE, cur_reg, srcreg, soffset);
while (size >= 8) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI8_MEMBASE, cur_reg, srcreg, soffset);
soffset += 8;
size -= 8;
}
soffset += 8;
size -= 8;
}
while (size >= 4) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, cur_reg, srcreg, soffset);
while (size >= 4) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI4_MEMBASE, cur_reg, srcreg, soffset);
soffset += 4;
size -= 4;
}
soffset += 4;
size -= 4;
}
while (size >= 2) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI2_MEMBASE, cur_reg, srcreg, soffset);
while (size >= 2) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI2_MEMBASE, cur_reg, srcreg, soffset);
soffset += 2;
size -= 2;
}
soffset += 2;
size -= 2;
}
while (size >= 1) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, cur_reg, srcreg, soffset);
while (size >= 1) {
cur_reg = alloc_preg (cfg);
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADI1_MEMBASE, cur_reg, srcreg, soffset);