arm_ldrx_lit (code, ARMREG_IP0, code + 8);
arm_brx (code, ARMREG_IP0);
*(guint64*)code = (guint64)target;
+ code += sizeof (guint64);
mono_arch_flush_icache (p, code - p);
return code;
{
switch (relocation) {
case MONO_R_ARM64_B:
- arm_b (code, target);
+ if (arm_is_bl_disp (code, target)) {
+ arm_b (code, target);
+ } else {
+ gpointer thunk;
+
+ thunk = create_thunk (cfg, domain, code, target);
+ g_assert (arm_is_bl_disp (code, thunk));
+ arm_b (code, thunk);
+ }
break;
case MONO_R_ARM64_BCC: {
int cond;
thunk = create_thunk (cfg, domain, code, target);
g_assert (arm_is_bl_disp (code, thunk));
- arm_bl (code, thunk);
+ arm_bl (code, thunk);
}
break;
default:
#if __APPLE__
sys_icache_invalidate (code, size);
#else
- __clear_cache (code, code + size);
+ /* Don't rely on GCC's __clear_cache implementation, as it caches
+ * icache/dcache cache line sizes, that can vary between cores on
+ * big.LITTLE architectures. */
+ guint64 end = (guint64) (code + size);
+ guint64 addr;
+ /* always go with cacheline size of 4 bytes as this code isn't perf critical
+ * anyway. Reading the cache line size from a machine register can be racy
+ * on a big.LITTLE architecture if the cores don't have the same cache line
+ * sizes. */
+ const size_t icache_line_size = 4;
+ const size_t dcache_line_size = 4;
+
+ addr = (guint64) code & ~(guint64) (dcache_line_size - 1);
+ for (; addr < end; addr += dcache_line_size)
+ asm volatile("dc civac, %0" : : "r" (addr) : "memory");
+ asm volatile("dsb ish" : : : "memory");
+
+ addr = (guint64) code & ~(guint64) (icache_line_size - 1);
+ for (; addr < end; addr += icache_line_size)
+ asm volatile("ic ivau, %0" : : "r" (addr) : "memory");
+
+ asm volatile ("dsb ish" : : : "memory");
+ asm volatile ("isb" : : : "memory");
#endif
#endif
}
guint8 *buf [16];
buf [0] = code;
- arm_ldaxrw (code, ARMREG_IP0, sreg1);
+ arm_ldxrw (code, ARMREG_IP0, sreg1);
arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2);
arm_stlxrw (code, ARMREG_IP1, ARMREG_IP0, sreg1);
arm_cbnzw (code, ARMREG_IP1, buf [0]);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
guint8 *buf [16];
buf [0] = code;
- arm_ldaxrx (code, ARMREG_IP0, sreg1);
+ arm_ldxrx (code, ARMREG_IP0, sreg1);
arm_addx (code, ARMREG_IP0, ARMREG_IP0, sreg2);
arm_stlxrx (code, ARMREG_IP1, ARMREG_IP0, sreg1);
arm_cbnzx (code, ARMREG_IP1, buf [0]);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
guint8 *buf [16];
buf [0] = code;
- arm_ldaxrw (code, ARMREG_IP0, sreg1);
+ arm_ldxrw (code, ARMREG_IP0, sreg1);
arm_stlxrw (code, ARMREG_IP1, sreg2, sreg1);
arm_cbnzw (code, ARMREG_IP1, buf [0]);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
guint8 *buf [16];
buf [0] = code;
- arm_ldaxrx (code, ARMREG_IP0, sreg1);
+ arm_ldxrx (code, ARMREG_IP0, sreg1);
arm_stlxrx (code, ARMREG_IP1, sreg2, sreg1);
arm_cbnzw (code, ARMREG_IP1, buf [0]);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
/* sreg2 is the value, sreg3 is the comparand */
buf [0] = code;
- arm_ldaxrw (code, ARMREG_IP0, sreg1);
+ arm_ldxrw (code, ARMREG_IP0, sreg1);
arm_cmpw (code, ARMREG_IP0, ins->sreg3);
buf [1] = code;
arm_bcc (code, ARMCOND_NE, 0);
arm_cbnzw (code, ARMREG_IP1, buf [0]);
arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
guint8 *buf [16];
buf [0] = code;
- arm_ldaxrx (code, ARMREG_IP0, sreg1);
+ arm_ldxrx (code, ARMREG_IP0, sreg1);
arm_cmpx (code, ARMREG_IP0, ins->sreg3);
buf [1] = code;
arm_bcc (code, ARMCOND_NE, 0);
arm_cbnzw (code, ARMREG_IP1, buf [0]);
arm_patch_rel (buf [1], code, MONO_R_ARM64_BCC);
+ arm_dmb (code, 0);
arm_movx (code, dreg, ARMREG_IP0);
break;
}
case OP_ATOMIC_LOAD_I1: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarb (code, ins->dreg, ARMREG_LR);
arm_sxtbx (code, ins->dreg, ins->dreg);
break;
}
case OP_ATOMIC_LOAD_U1: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarb (code, ins->dreg, ARMREG_LR);
arm_uxtbx (code, ins->dreg, ins->dreg);
break;
}
case OP_ATOMIC_LOAD_I2: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarh (code, ins->dreg, ARMREG_LR);
arm_sxthx (code, ins->dreg, ins->dreg);
break;
}
case OP_ATOMIC_LOAD_U2: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarh (code, ins->dreg, ARMREG_LR);
arm_uxthx (code, ins->dreg, ins->dreg);
break;
}
case OP_ATOMIC_LOAD_I4: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarw (code, ins->dreg, ARMREG_LR);
arm_sxtwx (code, ins->dreg, ins->dreg);
break;
}
case OP_ATOMIC_LOAD_U4: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarw (code, ins->dreg, ARMREG_LR);
arm_movw (code, ins->dreg, ins->dreg); /* Clear upper half of the register. */
break;
case OP_ATOMIC_LOAD_I8:
case OP_ATOMIC_LOAD_U8: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarx (code, ins->dreg, ARMREG_LR);
break;
}
case OP_ATOMIC_LOAD_R4: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
if (cfg->r4fp) {
arm_ldarw (code, ARMREG_LR, ARMREG_LR);
arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR);
}
case OP_ATOMIC_LOAD_R8: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_basereg, ins->inst_offset);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
arm_ldarx (code, ARMREG_LR, ARMREG_LR);
arm_fmov_rx_to_double (code, ins->dreg, ARMREG_LR);
break;
case OP_ATOMIC_STORE_U1: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
arm_stlrb (code, ARMREG_LR, ins->sreg1);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
case OP_ATOMIC_STORE_I2:
case OP_ATOMIC_STORE_U2: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
arm_stlrh (code, ARMREG_LR, ins->sreg1);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
case OP_ATOMIC_STORE_I4:
case OP_ATOMIC_STORE_U4: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
arm_stlrw (code, ARMREG_LR, ins->sreg1);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
case OP_ATOMIC_STORE_I8:
case OP_ATOMIC_STORE_U8: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
arm_stlrx (code, ARMREG_LR, ins->sreg1);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
case OP_ATOMIC_STORE_R4: {
arm_fmov_double_to_rx (code, ARMREG_IP0, FP_TEMP_REG);
arm_stlrw (code, ARMREG_LR, ARMREG_IP0);
}
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
case OP_ATOMIC_STORE_R8: {
code = emit_addx_imm (code, ARMREG_LR, ins->inst_destbasereg, ins->inst_offset);
arm_fmov_double_to_rx (code, ARMREG_IP0, ins->sreg1);
arm_stlrx (code, ARMREG_LR, ARMREG_IP0);
+ if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
+ arm_dmb (code, 0);
break;
}
}
gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
- gpointer fail_tramp)
+mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+ gpointer fail_tramp)
{
int i, buf_len, imt_reg;
guint8 *buf, *code;
#if DEBUG_IMT
- printf ("building IMT thunk for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable);
+ printf ("building IMT trampoline for class %s %s entries %d code size %d code at %p end %p vtable %p\n", vtable->klass->name_space, vtable->klass->name, count, size, start, ((guint8*)start) + size, vtable);
for (i = 0; i < count; ++i) {
MonoIMTCheckItem *item = imt_entries [i];
printf ("method %d (%p) %s vtable slot %p is_equals %d chunk size %d\n", i, item->key, item->key->name, &vtable->vtable [item->value.vtable_slot], item->is_equals, item->chunk_size);
}
if (fail_tramp)
- buf = mono_method_alloc_generic_virtual_thunk (domain, buf_len);
+ buf = mono_method_alloc_generic_virtual_trampoline (domain, buf_len);
else
buf = mono_domain_code_reserve (domain, buf_len);
code = buf;
#else /* DISABLE_JIT */
gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
- gpointer fail_tramp)
+mono_arch_build_imt_trampoline (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+ gpointer fail_tramp)
{
g_assert_not_reached ();
return NULL;