Last bucket of cleanups before I land the actual code for unaligned.
#include "mini.h"
#include "ir-emit.h"
+#define MAX_INLINE_COPIES 10
+
void
mini_emit_memset (MonoCompile *cfg, int destreg, int offset, int size, int val, int align)
{
}
}
+static void
+mini_emit_memcpy_internal (MonoCompile *cfg, MonoInst *dest, MonoInst *src, MonoInst *size_ins, int size, int align)
+{
+ /* FIXME: Optimize the case when src/dest is OP_LDADDR */
+
+ /* We can't do copies at a smaller granule than the provided alignment */
+ if (size_ins || ((size / align > MAX_INLINE_COPIES) && !(cfg->opt & MONO_OPT_INTRINS))) {
+ MonoInst *iargs [3];
+ iargs [0] = dest;
+ iargs [1] = src;
+
+ if (!size_ins)
+ EMIT_NEW_ICONST (cfg, size_ins, size);
+ iargs [2] = size_ins;
+ mono_emit_method_call (cfg, mini_get_memcpy_method (), iargs, NULL);
+ } else {
+ mini_emit_memcpy (cfg, dest->dreg, 0, src->dreg, 0, size, align);
+ }
+}
+
+static void
+mini_emit_memset_internal (MonoCompile *cfg, MonoInst *dest, MonoInst *value_ins, int value, MonoInst *size_ins, int size, int align)
+{
+ /* FIXME: Optimize the case when dest is OP_LDADDR */
+
+ /* We can't do copies at a smaller granule than the provided alignment */
+ if (value_ins || size_ins || value != 0 || ((size / align > MAX_INLINE_COPIES) && !(cfg->opt & MONO_OPT_INTRINS))) {
+ MonoInst *iargs [3];
+ iargs [0] = dest;
+
+ if (!value_ins)
+ EMIT_NEW_ICONST (cfg, value_ins, value);
+ iargs [1] = value_ins;
+
+ if (!size_ins)
+ EMIT_NEW_ICONST (cfg, size_ins, size);
+ iargs [2] = size_ins;
+
+ mono_emit_method_call (cfg, mini_get_memset_method (), iargs, NULL);
+ } else {
+ mini_emit_memset (cfg, dest->dreg, 0, size, value, align);
+ }
+}
+
+static void
+mini_emit_memcpy_const_size (MonoCompile *cfg, MonoInst *dest, MonoInst *src, int size, int align)
+{
+ mini_emit_memcpy_internal (cfg, dest, src, NULL, size, align);
+}
+
+static void
+mini_emit_memset_const_size (MonoCompile *cfg, MonoInst *dest, int value, int size, int align)
+{
+ mini_emit_memset_internal (cfg, dest, NULL, value, NULL, size, align);
+}
+
MonoInst*
mini_emit_memory_load (MonoCompile *cfg, MonoType *type, MonoInst *src, int offset, int ins_flag)
{
}
}
+void
+mini_emit_memory_copy_bytes (MonoCompile *cfg, MonoInst *dest, MonoInst *src, MonoInst *size, int ins_flag)
+{
+ int align = SIZEOF_VOID_P;
+
+ /*
+ * FIXME: It's unclear whether we should be emitting both the acquire
+ * and release barriers for cpblk. It is technically both a load and
+ * store operation, so it seems like that's the sensible thing to do.
+ *
+ * FIXME: We emit full barriers on both sides of the operation for
+ * simplicity. We should have a separate atomic memcpy method instead.
+ */
+ if (ins_flag & MONO_INST_VOLATILE) {
+ /* Volatile loads have acquire semantics, see 12.6.7 in Ecma 335 */
+ mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_SEQ);
+ }
+
+ if ((cfg->opt & MONO_OPT_INTRINS) && (size->opcode == OP_ICONST)) {
+ mini_emit_memcpy_const_size (cfg, dest, src, size->inst_c0, align);
+ } else {
+ if (cfg->verbose_level > 3)
+ printf ("EMITING REGULAR COPY\n");
+ mini_emit_memcpy_internal (cfg, dest, src, size, 0, align);
+ }
+
+ if (ins_flag & MONO_INST_VOLATILE) {
+ /* Volatile loads have acquire semantics, see 12.6.7 in Ecma 335 */
+ mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_SEQ);
+ }
+}
+
+void
+mini_emit_memory_init_bytes (MonoCompile *cfg, MonoInst *dest, MonoInst *value, MonoInst *size, int ins_flag)
+{
+ int align = SIZEOF_VOID_P;
+
+ if (ins_flag & MONO_INST_VOLATILE) {
+ /* Volatile stores have release semantics, see 12.6.7 in Ecma 335 */
+ mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_REL);
+ }
+
+ //FIXME unrolled memset only supports zeroing
+ if ((cfg->opt & MONO_OPT_INTRINS) && (size->opcode == OP_ICONST) && (value->opcode == OP_ICONST) && (value->inst_c0 == 0)) {
+ mini_emit_memset_const_size (cfg, dest, value->inst_c0, size->inst_c0, align);
+ } else {
+ mini_emit_memset_internal (cfg, dest, value, 0, size, 0, align);
+ }
+
+}
+
#endif
ip += 6;
break;
case CEE_CPBLK:
- case CEE_INITBLK: {
- MonoInst *iargs [3];
CHECK_STACK (3);
sp -= 3;
-
- /* Skip optimized paths for volatile operations. */
- if ((ip [1] == CEE_CPBLK) && !(ins_flag & MONO_INST_VOLATILE) && (cfg->opt & MONO_OPT_INTRINS) && (sp [2]->opcode == OP_ICONST) && ((n = sp [2]->inst_c0) <= sizeof (gpointer) * 5)) {
- mini_emit_memcpy (cfg, sp [0]->dreg, 0, sp [1]->dreg, 0, sp [2]->inst_c0, 0);
- } else if ((ip [1] == CEE_INITBLK) && !(ins_flag & MONO_INST_VOLATILE) && (cfg->opt & MONO_OPT_INTRINS) && (sp [2]->opcode == OP_ICONST) && ((n = sp [2]->inst_c0) <= sizeof (gpointer) * 5) && (sp [1]->opcode == OP_ICONST) && (sp [1]->inst_c0 == 0)) {
- /* emit_memset only works when val == 0 */
- mini_emit_memset (cfg, sp [0]->dreg, 0, sp [2]->inst_c0, sp [1]->inst_c0, 0);
- } else {
- MonoInst *call;
- iargs [0] = sp [0];
- iargs [1] = sp [1];
- iargs [2] = sp [2];
- if (ip [1] == CEE_CPBLK) {
- /*
- * FIXME: It's unclear whether we should be emitting both the acquire
- * and release barriers for cpblk. It is technically both a load and
- * store operation, so it seems like that's the sensible thing to do.
- *
- * FIXME: We emit full barriers on both sides of the operation for
- * simplicity. We should have a separate atomic memcpy method instead.
- */
- MonoMethod *memcpy_method = mini_get_memcpy_method ();
-
- if (ins_flag & MONO_INST_VOLATILE)
- mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_SEQ);
-
- call = mono_emit_method_call (cfg, memcpy_method, iargs, NULL);
- call->flags |= ins_flag;
-
- if (ins_flag & MONO_INST_VOLATILE)
- mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_SEQ);
- } else {
- MonoMethod *memset_method = mini_get_memset_method ();
- if (ins_flag & MONO_INST_VOLATILE) {
- /* Volatile stores have release semantics, see 12.6.7 in Ecma 335 */
- mini_emit_memory_barrier (cfg, MONO_MEMORY_BARRIER_REL);
- }
- call = mono_emit_method_call (cfg, memset_method, iargs, NULL);
- call->flags |= ins_flag;
- }
- }
+ mini_emit_memory_copy_bytes (cfg, sp [0], sp [1], sp [2], ins_flag);
+ ip += 2;
+ ins_flag = 0;
+ inline_costs += 1;
+ break;
+ case CEE_INITBLK:
+ CHECK_STACK (3);
+ sp -= 3;
+ mini_emit_memory_init_bytes (cfg, sp [0], sp [1], sp [2], ins_flag);
ip += 2;
ins_flag = 0;
inline_costs += 1;
break;
- }
case CEE_NO_:
CHECK_OPSIZE (3);
if (ip [2] & 0x1)
load->klass = vtaddr->klass;
load->dreg = mono_alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, load);
- mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 4);
+ mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, SIZEOF_VOID_P);
if (ainfo->pair_storage [0] == ArgInIReg) {
MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE);
MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, dreg);
} else if (size <= 40) {
- mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
+ mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, SIZEOF_VOID_P);
} else {
// FIXME: Code growth
- mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
+ mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, SIZEOF_VOID_P);
}
if (cfg->compute_gc_maps) {
MONO_ADD_INS (cfg->cbb, store);
}
} else {
- mini_emit_memcpy (cfg, IA64_SP, 16 + ainfo->offset, src->dreg, 0, size, 4);
+ mini_emit_memcpy (cfg, IA64_SP, 16 + ainfo->offset, src->dreg, 0, size, SIZEOF_VOID_P);
}
}
soffset += SIZEOF_REGISTER;
}
if (ovf_size != 0) {
- mini_emit_memcpy (cfg, mips_sp, doffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+ mini_emit_memcpy (cfg, mips_sp, doffset, src->dreg, soffset, ovf_size * sizeof (gpointer), SIZEOF_VOID_P);
}
} else if (ainfo->storage == ArgInFReg) {
int tmpr = mono_alloc_freg (cfg);
g_assert (ovf_size > 0);
EMIT_NEW_VARLOADA (cfg, load, vtcopy, vtcopy->inst_vtype);
- mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 0);
+ mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, SIZEOF_VOID_P);
if (ainfo->offset)
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, mips_at, ainfo->offset, load->dreg);
soffset += sizeof (gpointer);
}
if (ovf_size != 0)
- mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+ mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), SIZEOF_VOID_P);
} else if (ainfo->regtype == RegTypeFPStructByVal) {
soffset = 0;
for (i = 0; i < ainfo->vtregs; ++i) {
soffset += ainfo->size;
}
if (ovf_size != 0)
- mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), 0);
+ mini_emit_memcpy (cfg, ppc_r1, doffset + soffset, src->dreg, soffset, ovf_size * sizeof (gpointer), SIZEOF_VOID_P);
} else if (ainfo->regtype == RegTypeFP) {
int tmpr = mono_alloc_freg (cfg);
if (ainfo->size == 4)
g_assert (ovf_size > 0);
EMIT_NEW_VARLOADA (cfg, load, vtcopy, vtcopy->inst_vtype);
- mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 0);
+ mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, SIZEOF_VOID_P);
if (ainfo->offset)
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ppc_r1, ainfo->offset, load->dreg);
ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
int size = ins->backend.size;
- mini_emit_memcpy (cfg, sparc_sp, ainfo->offset, src->dreg, 0, size, 0);
+ mini_emit_memcpy (cfg, sparc_sp, ainfo->offset, src->dreg, 0, size, SIZEOF_VOID_P);
}
void
gboolean mini_emit_wb_aware_memcpy (MonoCompile *cfg, MonoClass *klass, MonoInst *iargs[4], int size, int align);
MonoInst* mini_emit_memory_load (MonoCompile *cfg, MonoType *type, MonoInst *src, int offset, int ins_flag);
void mini_emit_memory_store (MonoCompile *cfg, MonoType *type, MonoInst *dest, MonoInst *value, int ins_flag);
+void mini_emit_memory_copy_bytes (MonoCompile *cfg, MonoInst *dest, MonoInst *src, MonoInst *size, int ins_flag);
+void mini_emit_memory_init_bytes (MonoCompile *cfg, MonoInst *dest, MonoInst *value, MonoInst *size, int ins_flag);
MonoMethod* mini_get_memcpy_method (void);
MonoMethod* mini_get_memset_method (void);