#include <mono/metadata/profiler-private.h>
#include <mono/metadata/mono-debug.h>
#include <mono/utils/mono-math.h>
+#include <mono/utils/mono-mmap.h>
#include "trace.h"
#include "ir-emit.h"
#include "mini-amd64.h"
#include "cpu-amd64.h"
+#include "debugger-agent.h"
/*
* Can't define this in mini-amd64.h cause that would turn on the generic code in
static gint lmf_tls_offset = -1;
static gint lmf_addr_tls_offset = -1;
static gint appdomain_tls_offset = -1;
-static gint thread_tls_offset = -1;
#ifdef MONO_XEN_OPT
static gboolean optimize_for_xen = TRUE;
#define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
#endif
+/* amd64_mov_reg_imm () */
+#define BREAKPOINT_SIZE 8
+
/* This mutex protects architecture specific caches */
#define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
#define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
MonoBreakpointInfo
mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
+/*
+ * The code generated for sequence points reads from this location, which is
+ * made read-only when single stepping is enabled.
+ */
+static gpointer ss_trigger_page;
+
+/* Enabled breakpoints read from this trigger page */
+static gpointer bp_trigger_page;
+
#ifdef PLATFORM_WIN32
/* On Win64 always reserve first 32 bytes for first four arguments */
#define ARGS_OFFSET 48
return "unknown";
}
-static const char * xmmregs [] = {
- "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8",
- "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+static const char * packed_xmmregs [] = {
+ "p:xmm0", "p:xmm1", "p:xmm2", "p:xmm3", "p:xmm4", "p:xmm5", "p:xmm6", "p:xmm7", "p:xmm8",
+ "p:xmm9", "p:xmm10", "p:xmm11", "p:xmm12", "p:xmm13", "p:xmm14", "p:xmm15"
+};
+
+static const char * single_xmmregs [] = {
+ "s:xmm0", "s:xmm1", "s:xmm2", "s:xmm3", "s:xmm4", "s:xmm5", "s:xmm6", "s:xmm7", "s:xmm8",
+ "s:xmm9", "s:xmm10", "s:xmm11", "s:xmm12", "s:xmm13", "s:xmm14", "s:xmm15"
};
const char*
mono_arch_fregname (int reg)
{
if (reg < AMD64_XMM_NREG)
- return xmmregs [reg];
+ return single_xmmregs [reg];
+ else
+ return "unknown";
+}
+
+const char *
+mono_arch_xregname (int reg)
+{
+ if (reg < AMD64_XMM_NREG)
+ return packed_xmmregs [reg];
else
return "unknown";
}
guint32 reg_usage;
guint32 freg_usage;
gboolean need_stack_align;
+ gboolean vtype_retaddr;
ArgInfo ret;
ArgInfo sig_cookie;
ArgInfo args [1];
else
cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
+ cinfo->nargs = n;
+
gr = 0;
fr = 0;
guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
- if (cinfo->ret.storage == ArgOnStack)
+ if (cinfo->ret.storage == ArgOnStack) {
+ cinfo->vtype_retaddr = TRUE;
/* The caller passes the address where the value is stored */
add_general (&gr, &stack_size, &cinfo->ret);
+ }
break;
}
case MONO_TYPE_TYPEDBYREF:
mono_arch_init (void)
{
InitializeCriticalSection (&mini_arch_mutex);
+
+ ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+ bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT);
+ mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
}
/*
return opts;
}
+/*
+ * This function test for all SSE functions supported.
+ *
+ * Returns a bitmask corresponding to all supported versions.
+ *
+ */
+guint32
+mono_arch_cpu_enumerate_simd_versions (void)
+{
+ int eax, ebx, ecx, edx;
+ guint32 sse_opts = 0;
+
+ if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
+ if (edx & (1 << 25))
+ sse_opts |= 1 << SIMD_VERSION_SSE1;
+ if (edx & (1 << 26))
+ sse_opts |= 1 << SIMD_VERSION_SSE2;
+ if (ecx & (1 << 0))
+ sse_opts |= 1 << SIMD_VERSION_SSE3;
+ if (ecx & (1 << 9))
+ sse_opts |= 1 << SIMD_VERSION_SSSE3;
+ if (ecx & (1 << 19))
+ sse_opts |= 1 << SIMD_VERSION_SSE41;
+ if (ecx & (1 << 20))
+ sse_opts |= 1 << SIMD_VERSION_SSE42;
+ }
+
+ /* Yes, all this needs to be done to check for sse4a.
+ See: "Amd: CPUID Specification"
+ */
+ if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
+ /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
+ if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
+ cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
+ if (ecx & (1 << 6))
+ sse_opts |= 1 << SIMD_VERSION_SSE4a;
+ }
+ }
+
+ return sse_opts;
+}
+
GList *
mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
{
x86_branch32 (code, cond, 0, sign); \
}
+typedef struct {
+ MonoMethodSignature *sig;
+ CallInfo *cinfo;
+} ArchDynCallInfo;
+
+typedef struct {
+ mgreg_t regs [PARAM_REGS];
+ mgreg_t res;
+ guint8 *ret;
+} DynCallArgs;
+
+static gboolean
+dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo)
+{
+ int i;
+
+#ifdef PLATFORM_WIN32
+ return FALSE;
+#endif
+
+ switch (cinfo->ret.storage) {
+ case ArgNone:
+ case ArgInIReg:
+ break;
+ case ArgValuetypeInReg: {
+ ArgInfo *ainfo = &cinfo->ret;
+
+ if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
+ return FALSE;
+ if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
+ return FALSE;
+ break;
+ }
+ default:
+ return FALSE;
+ }
+
+ for (i = 0; i < cinfo->nargs; ++i) {
+ ArgInfo *ainfo = &cinfo->args [i];
+ switch (ainfo->storage) {
+ case ArgInIReg:
+ break;
+ case ArgValuetypeInReg:
+ if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
+ return FALSE;
+ if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
+ return FALSE;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+/*
+ * mono_arch_dyn_call_prepare:
+ *
+ * Return a pointer to an arch-specific structure which contains information
+ * needed by mono_arch_get_dyn_call_args (). Return NULL if OP_DYN_CALL is not
+ * supported for SIG.
+ * This function is equivalent to ffi_prep_cif in libffi.
+ */
+MonoDynCallInfo*
+mono_arch_dyn_call_prepare (MonoMethodSignature *sig)
+{
+ ArchDynCallInfo *info;
+ CallInfo *cinfo;
+
+ cinfo = get_call_info (NULL, NULL, sig, FALSE);
+
+ if (!dyn_call_supported (sig, cinfo)) {
+ g_free (cinfo);
+ return NULL;
+ }
+
+ info = g_new0 (ArchDynCallInfo, 1);
+ // FIXME: Preprocess the info to speed up get_dyn_call_args ().
+ info->sig = sig;
+ info->cinfo = cinfo;
+
+ return (MonoDynCallInfo*)info;
+}
+
+/*
+ * mono_arch_dyn_call_free:
+ *
+ * Free a MonoDynCallInfo structure.
+ */
+void
+mono_arch_dyn_call_free (MonoDynCallInfo *info)
+{
+ ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
+
+ g_free (ainfo->cinfo);
+ g_free (ainfo);
+}
+
+/*
+ * mono_arch_get_start_dyn_call:
+ *
+ * Convert the arguments ARGS to a format which can be passed to OP_DYN_CALL, and
+ * store the result into BUF.
+ * ARGS should be an array of pointers pointing to the arguments.
+ * RET should point to a memory buffer large enought to hold the result of the
+ * call.
+ * This function should be as fast as possible, any work which does not depend
+ * on the actual values of the arguments should be done in
+ * mono_arch_dyn_call_prepare ().
+ * start_dyn_call + OP_DYN_CALL + finish_dyn_call is equivalent to ffi_call in
+ * libffi.
+ */
+void
+mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
+{
+ ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
+ DynCallArgs *p = (DynCallArgs*)buf;
+ int arg_index, greg, i;
+ MonoMethodSignature *sig = dinfo->sig;
+
+ g_assert (buf_len >= sizeof (DynCallArgs));
+
+ p->res = 0;
+ p->ret = ret;
+
+ arg_index = 0;
+ greg = 0;
+
+ if (dinfo->cinfo->vtype_retaddr)
+ p->regs [greg ++] = (mgreg_t)ret;
+
+ if (sig->hasthis) {
+ p->regs [greg ++] = (mgreg_t)*(args [arg_index ++]);
+ }
+
+ for (i = 0; i < sig->param_count; i++) {
+ MonoType *t = mono_type_get_underlying_type (sig->params [i]);
+ gpointer *arg = args [arg_index ++];
+
+ if (t->byref) {
+ p->regs [greg ++] = (mgreg_t)*(arg);
+ continue;
+ }
+
+ switch (t->type) {
+ case MONO_TYPE_STRING:
+ case MONO_TYPE_CLASS:
+ case MONO_TYPE_ARRAY:
+ case MONO_TYPE_SZARRAY:
+ case MONO_TYPE_OBJECT:
+ case MONO_TYPE_PTR:
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8:
+ g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
+ p->regs [greg ++] = (mgreg_t)*(arg);
+ break;
+ case MONO_TYPE_BOOLEAN:
+ case MONO_TYPE_U1:
+ p->regs [greg ++] = *(guint8*)(arg);
+ break;
+ case MONO_TYPE_I1:
+ p->regs [greg ++] = *(gint8*)(arg);
+ break;
+ case MONO_TYPE_I2:
+ p->regs [greg ++] = *(gint16*)(arg);
+ break;
+ case MONO_TYPE_U2:
+ case MONO_TYPE_CHAR:
+ p->regs [greg ++] = *(guint16*)(arg);
+ break;
+ case MONO_TYPE_I4:
+ p->regs [greg ++] = *(gint32*)(arg);
+ break;
+ case MONO_TYPE_U4:
+ p->regs [greg ++] = *(guint32*)(arg);
+ break;
+ case MONO_TYPE_GENERICINST:
+ if (MONO_TYPE_IS_REFERENCE (t)) {
+ p->regs [greg ++] = (mgreg_t)*(arg);
+ break;
+ } else {
+ /* Fall through */
+ }
+ case MONO_TYPE_VALUETYPE: {
+ ArgInfo *ainfo = &dinfo->cinfo->args [i + sig->hasthis];
+
+ g_assert (ainfo->storage == ArgValuetypeInReg);
+ if (ainfo->pair_storage [0] != ArgNone) {
+ g_assert (ainfo->pair_storage [0] == ArgInIReg);
+ p->regs [greg ++] = ((mgreg_t*)(arg))[0];
+ }
+ if (ainfo->pair_storage [1] != ArgNone) {
+ g_assert (ainfo->pair_storage [1] == ArgInIReg);
+ p->regs [greg ++] = ((mgreg_t*)(arg))[1];
+ }
+ break;
+ }
+ default:
+ g_assert_not_reached ();
+ }
+ }
+
+ g_assert (greg <= PARAM_REGS);
+}
+
+/*
+ * mono_arch_finish_dyn_call:
+ *
+ * Store the result of a dyn call into the return value buffer passed to
+ * start_dyn_call ().
+ * This function should be as fast as possible, any work which does not depend
+ * on the actual values of the arguments should be done in
+ * mono_arch_dyn_call_prepare ().
+ */
+void
+mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
+{
+ ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
+ MonoMethodSignature *sig = dinfo->sig;
+ guint8 *ret = ((DynCallArgs*)buf)->ret;
+ mgreg_t res = ((DynCallArgs*)buf)->res;
+
+ switch (mono_type_get_underlying_type (sig->ret)->type) {
+ case MONO_TYPE_VOID:
+ *(gpointer*)ret = NULL;
+ break;
+ case MONO_TYPE_STRING:
+ case MONO_TYPE_CLASS:
+ case MONO_TYPE_ARRAY:
+ case MONO_TYPE_SZARRAY:
+ case MONO_TYPE_OBJECT:
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+ case MONO_TYPE_PTR:
+ *(gpointer*)ret = (gpointer)res;
+ break;
+ case MONO_TYPE_I1:
+ *(gint8*)ret = res;
+ break;
+ case MONO_TYPE_U1:
+ case MONO_TYPE_BOOLEAN:
+ *(guint8*)ret = res;
+ break;
+ case MONO_TYPE_I2:
+ *(gint16*)ret = res;
+ break;
+ case MONO_TYPE_U2:
+ case MONO_TYPE_CHAR:
+ *(guint16*)ret = res;
+ break;
+ case MONO_TYPE_I4:
+ *(gint32*)ret = res;
+ break;
+ case MONO_TYPE_U4:
+ *(guint32*)ret = res;
+ break;
+ case MONO_TYPE_I8:
+ *(gint64*)ret = res;
+ break;
+ case MONO_TYPE_U8:
+ *(guint64*)ret = res;
+ break;
+ case MONO_TYPE_GENERICINST:
+ if (MONO_TYPE_IS_REFERENCE (sig->ret)) {
+ *(gpointer*)ret = (gpointer)res;
+ break;
+ } else {
+ /* Fall through */
+ }
+ case MONO_TYPE_VALUETYPE:
+ if (dinfo->cinfo->vtype_retaddr) {
+ /* Nothing to do */
+ } else {
+ ArgInfo *ainfo = &dinfo->cinfo->ret;
+
+ g_assert (ainfo->storage == ArgValuetypeInReg);
+
+ if (ainfo->pair_storage [0] != ArgNone) {
+ g_assert (ainfo->pair_storage [0] == ArgInIReg);
+ ((mgreg_t*)ret)[0] = res;
+ }
+
+ g_assert (ainfo->pair_storage [1] == ArgNone);
+ }
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
/* emit an exception if condition is fail */
#define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
do { \
ins->sreg1 = temp->dreg;
}
break;
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ case OP_EXPAND_I1: {
+ int temp_reg1 = mono_alloc_ireg (cfg);
+ int temp_reg2 = mono_alloc_ireg (cfg);
+ int original_reg = ins->sreg1;
+
+ NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
+ temp->sreg1 = original_reg;
+ temp->dreg = temp_reg1;
+
+ NEW_INS (cfg, ins, temp, OP_SHL_IMM);
+ temp->sreg1 = temp_reg1;
+ temp->dreg = temp_reg2;
+ temp->inst_imm = 8;
+
+ NEW_INS (cfg, ins, temp, OP_LOR);
+ temp->sreg1 = temp->dreg = temp_reg2;
+ temp->sreg2 = temp_reg1;
+
+ ins->opcode = OP_EXPAND_I2;
+ ins->sreg1 = temp_reg2;
+ }
+ break;
+#endif
default:
break;
}
case OP_NOT_REACHED:
case OP_NOT_NULL:
break;
+ case OP_SEQ_POINT: {
+ int i, il_offset;
+
+ /*
+ * Read from the single stepping trigger page. This will cause a
+ * SIGSEGV when single stepping is enabled.
+ * We do this _before_ the breakpoint, so single stepping after
+ * a breakpoint is hit will step to the next IL offset.
+ */
+ g_assert (((guint64)ss_trigger_page >> 32) == 0);
+
+ if (ins->flags & MONO_INST_SINGLE_STEP_LOC)
+ amd64_mov_reg_mem (code, AMD64_R11, (guint64)ss_trigger_page, 4);
+
+ il_offset = ins->inst_imm;
+
+ if (!cfg->seq_points)
+ cfg->seq_points = g_ptr_array_new ();
+ g_ptr_array_add (cfg->seq_points, GUINT_TO_POINTER (il_offset));
+ g_ptr_array_add (cfg->seq_points, GUINT_TO_POINTER (code - cfg->native_code));
+ /*
+ * A placeholder for a possible breakpoint inserted by
+ * mono_arch_set_breakpoint ().
+ */
+ for (i = 0; i < BREAKPOINT_SIZE; ++i)
+ x86_nop (code);
+ break;
+ }
case OP_ADDCC:
case OP_LADD:
amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
code = emit_move_return_value (cfg, ins, code);
break;
+ case OP_DYN_CALL: {
+ int i;
+ MonoInst *var = cfg->dyn_call_var;
+
+ g_assert (var->opcode == OP_REGOFFSET);
+
+ /* r11 = args buffer filled by mono_arch_get_dyn_call_args () */
+ amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
+ /* r10 = ftn */
+ amd64_mov_reg_reg (code, AMD64_R10, ins->sreg2, 8);
+
+ /* Save args buffer */
+ amd64_mov_membase_reg (code, var->inst_basereg, var->inst_offset, AMD64_R11, 8);
+
+ /* Set argument registers */
+ for (i = 0; i < PARAM_REGS; ++i)
+ amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof (gpointer), 8);
+
+ /* Make the call */
+ amd64_call_reg (code, AMD64_R10);
+
+ /* Save result */
+ amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
+ amd64_mov_membase_reg (code, AMD64_R11, G_STRUCT_OFFSET (DynCallArgs, res), AMD64_RAX, 8);
+ break;
+ }
case OP_AMD64_SAVE_SP_TO_LMF:
amd64_mov_membase_reg (code, cfg->frame_reg, cfg->arch.lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
break;
case OP_STORER8_MEMBASE_REG:
amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
break;
- case OP_LOADR8_SPILL_MEMBASE:
- g_assert_not_reached ();
- break;
case OP_LOADR8_MEMBASE:
amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
break;
amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
break;
}
- case OP_LIVERANGE_START: {
- if (cfg->verbose_level > 1)
- printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
- MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
+ case OP_ADDPS:
+ amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
break;
- }
- case OP_LIVERANGE_END: {
- if (cfg->verbose_level > 1)
- printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
- MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
+ case OP_DIVPS:
+ amd64_sse_divps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPS:
+ amd64_sse_mulps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPS:
+ amd64_sse_subps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPS:
+ amd64_sse_maxps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPS:
+ amd64_sse_minps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPS:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ amd64_sse_cmpps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPS:
+ amd64_sse_andps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPS:
+ amd64_sse_andnps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPS:
+ amd64_sse_orps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPS:
+ amd64_sse_xorps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SQRTPS:
+ amd64_sse_sqrtps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_RSQRTPS:
+ amd64_sse_rsqrtps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_RCPPS:
+ amd64_sse_rcpps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_ADDSUBPS:
+ amd64_sse_addsubps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPS:
+ amd64_sse_haddps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPS:
+ amd64_sse_hsubps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPS_HIGH:
+ amd64_sse_movshdup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_DUPPS_LOW:
+ amd64_sse_movsldup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PSHUFLEW_HIGH:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshufhw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_PSHUFLEW_LOW:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshuflw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_PSHUFLED:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+
+ case OP_ADDPD:
+ amd64_sse_addpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DIVPD:
+ amd64_sse_divpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPD:
+ amd64_sse_mulpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPD:
+ amd64_sse_subpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPD:
+ amd64_sse_maxpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPD:
+ amd64_sse_minpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPD:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ amd64_sse_cmppd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPD:
+ amd64_sse_andpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPD:
+ amd64_sse_andnpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPD:
+ amd64_sse_orpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPD:
+ amd64_sse_xorpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SQRTPD:
+ amd64_sse_sqrtpd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_ADDSUBPD:
+ amd64_sse_addsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPD:
+ amd64_sse_haddpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPD:
+ amd64_sse_hsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPD:
+ amd64_sse_movddup_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_EXTRACT_MASK:
+ amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PAND:
+ amd64_sse_pand_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_POR:
+ amd64_sse_por_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PXOR:
+ amd64_sse_pxor_reg_reg (code, ins->sreg1, ins->sreg2);
break;
- }
- default:
- g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
- g_assert_not_reached ();
- }
-
- if ((code - cfg->native_code - offset) > max_len) {
- g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
- mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
- g_assert_not_reached ();
- }
-
- last_ins = ins;
- last_offset = offset;
- }
-
- cfg->code_len = code - cfg->native_code;
-}
+
+ case OP_PADDB:
+ amd64_sse_paddb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW:
+ amd64_sse_paddw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDD:
+ amd64_sse_paddd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDQ:
+ amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUBB:
+ amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW:
+ amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBD:
+ amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBQ:
+ amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB_UN:
+ amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW_UN:
+ amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD_UN:
+ amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB:
+ amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW:
+ amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD:
+ amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PAVGB_UN:
+ amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PAVGW_UN:
+ amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB_UN:
+ amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW_UN:
+ amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND_UN:
+ amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB:
+ amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW:
+ amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND:
+ amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPEQB:
+ amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQW:
+ amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQD:
+ amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQQ:
+ amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPGTB:
+ amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTW:
+ amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTD:
+ amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTQ:
+ amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUM_ABS_DIFF:
+ amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_LOWB:
+ amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWW:
+ amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWD:
+ amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWQ:
+ amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPS:
+ amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPD:
+ amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_HIGHB:
+ amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHW:
+ amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHD:
+ amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHQ:
+ amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPS:
+ amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPD:
+ amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PACKW:
+ amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD:
+ amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKW_UN:
+ amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD_UN:
+ amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT_UN:
+ amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT_UN:
+ amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT_UN:
+ amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT_UN:
+ amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT:
+ amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT:
+ amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT:
+ amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT:
+ amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMULW:
+ amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULD:
+ amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULQ:
+ amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH_UN:
+ amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH:
+ amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSHRW:
+ amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRW_REG:
+ amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARW:
+ amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARW_REG:
+ amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLW:
+ amd64_sse_psllw_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLW_REG:
+ amd64_sse_psllw_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRD:
+ amd64_sse_psrld_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRD_REG:
+ amd64_sse_psrld_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARD:
+ amd64_sse_psrad_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARD_REG:
+ amd64_sse_psrad_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLD:
+ amd64_sse_pslld_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLD_REG:
+ amd64_sse_pslld_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRQ:
+ amd64_sse_psrlq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRQ_REG:
+ amd64_sse_psrlq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ /*TODO: This is appart of the sse spec but not added
+ case OP_PSARQ:
+ amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARQ_REG:
+ amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ */
+
+ case OP_PSHLQ:
+ amd64_sse_psllq_reg_imm (code, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLQ_REG:
+ amd64_sse_psllq_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_ICONV_TO_X:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ break;
+ case OP_EXTRACT_I4:
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ break;
+ case OP_EXTRACT_I8:
+ if (ins->inst_c0) {
+ amd64_movhlps_reg_reg (code, AMD64_XMM15, ins->sreg1);
+ amd64_movd_reg_xreg_size (code, ins->dreg, AMD64_XMM15, 8);
+ } else {
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
+ }
+ break;
+ case OP_EXTRACT_I1:
+ case OP_EXTRACT_U1:
+ amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ if (ins->inst_c0)
+ amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
+ amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
+ break;
+ case OP_EXTRACT_I2:
+ case OP_EXTRACT_U2:
+ /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ if (ins->inst_c0)
+ amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
+ amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4);
+ break;
+ case OP_EXTRACT_R8:
+ if (ins->inst_c0)
+ amd64_movhlps_reg_reg (code, ins->dreg, ins->sreg1);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_INSERT_I2:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_EXTRACTX_U2:
+ amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_INSERTX_U1_SLOW:
+ /*sreg1 is the extracted ireg (scratch)
+ /sreg2 is the to be inserted ireg (scratch)
+ /dreg is the xreg to receive the value*/
+
+ /*clear the bits from the extracted word*/
+ amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
+ /*shift the value to insert if needed*/
+ if (ins->inst_c0 & 1)
+ amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4);
+ /*join them together*/
+ amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
+ break;
+ case OP_INSERTX_I4_SLOW:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
+ amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
+ break;
+ case OP_INSERTX_I8_SLOW:
+ amd64_movd_xreg_reg_size(code, AMD64_XMM15, ins->sreg2, 8);
+ if (ins->inst_c0)
+ amd64_movlhps_reg_reg (code, ins->dreg, AMD64_XMM15);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM15);
+ break;
+
+ case OP_INSERTX_R4_SLOW:
+ switch (ins->inst_c0) {
+ case 0:
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ case 1:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
+ break;
+ case 2:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
+ break;
+ case 3:
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
+ break;
+ }
+ break;
+ case OP_INSERTX_R8_SLOW:
+ if (ins->inst_c0)
+ amd64_movlhps_reg_reg (code, ins->dreg, ins->sreg2);
+ else
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg2);
+ break;
+ case OP_STOREX_MEMBASE_REG:
+ case OP_STOREX_MEMBASE:
+ amd64_sse_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_LOADX_MEMBASE:
+ amd64_sse_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_LOADX_ALIGNED_MEMBASE:
+ amd64_sse_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_STOREX_ALIGNED_MEMBASE_REG:
+ amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_STOREX_NTA_MEMBASE_REG:
+ amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_PREFETCH_MEMBASE:
+ amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
+ break;
+
+ case OP_XMOVE:
+ /*FIXME the peephole pass should have killed this*/
+ if (ins->dreg != ins->sreg1)
+ amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_XZERO:
+ amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
+ break;
+ case OP_ICONV_TO_R8_RAW:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
+ break;
+
+ case OP_FCONV_TO_R8_X:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_XCONV_R8_TO_I4:
+ amd64_sse_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
+ switch (ins->backend.source_opcode) {
+ case OP_FCONV_TO_I1:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
+ break;
+ case OP_FCONV_TO_U1:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
+ break;
+ case OP_FCONV_TO_I2:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
+ break;
+ case OP_FCONV_TO_U2:
+ amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
+ break;
+ }
+ break;
+
+ case OP_EXPAND_I2:
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 0);
+ amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 1);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I4:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I8:
+ amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
+ break;
+ case OP_EXPAND_R4:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->dreg);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R8:
+ amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
+ amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
+ break;
+#endif
+ case OP_LIVERANGE_START: {
+ if (cfg->verbose_level > 1)
+ printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
+ MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
+ break;
+ }
+ case OP_LIVERANGE_END: {
+ if (cfg->verbose_level > 1)
+ printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
+ MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
+ break;
+ }
+ default:
+ g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
+ g_assert_not_reached ();
+ }
+
+ if ((code - cfg->native_code - offset) > max_len) {
+ g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
+ mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
+ g_assert_not_reached ();
+ }
+
+ last_ins = ins;
+ last_offset = offset;
+ }
+
+ cfg->code_len = code - cfg->native_code;
+}
#endif /* DISABLE_JIT */
guint8 *buf, *no_domain_branch;
code = mono_amd64_emit_tls_get (code, AMD64_RAX, appdomain_tls_offset);
- if ((domain >> 32) == 0)
- amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
- else
- amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
+ if (cfg->compile_aot) {
+ /* AOT code is only used in the root domain */
+ amd64_mov_reg_imm (code, AMD64_ARG_REG1, 0);
+ } else {
+ if ((domain >> 32) == 0)
+ amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
+ else
+ amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
+ }
amd64_alu_reg_reg (code, X86_CMP, AMD64_RAX, AMD64_ARG_REG1);
no_domain_branch = code;
x86_branch8 (code, X86_CC_NE, 0, 0);
#endif
} else {
g_assert (!cfg->compile_aot);
- if ((domain >> 32) == 0)
- amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
- else
- amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
+ if (cfg->compile_aot) {
+ /* AOT code is only used in the root domain */
+ amd64_mov_reg_imm (code, AMD64_ARG_REG1, 0);
+ } else {
+ if ((domain >> 32) == 0)
+ amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
+ else
+ amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
+ }
code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
(gpointer)"mono_jit_thread_attach", TRUE);
}
if (method->save_lmf) {
if ((lmf_tls_offset != -1) && !optimize_for_xen) {
/*
- * Optimized version which uses the mono_lmf TLS variable instead of indirection
- * through the mono_lmf_addr TLS variable.
+ * Optimized version which uses the mono_lmf TLS variable instead of
+ * indirection through the mono_lmf_addr TLS variable.
*/
/* %rax = previous_lmf */
x86_prefix (code, X86_FS_PREFIX);
}
gpointer
-mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
+mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
{
guint8 buf [10];
guint32 reg;
gint32 disp;
guint8 rex = 0;
+ MonoJitInfo *ji = NULL;
+
+#ifdef ENABLE_LLVM
+ /* code - 9 might be before the start of the method */
+ /* FIXME: Avoid this expensive call somehow */
+ ji = mono_jit_info_table_find (mono_domain_get (), (char*)code);
+#endif
- mono_breakpoint_clean_code (NULL, code, 9, buf, sizeof (buf));
+ mono_breakpoint_clean_code (ji ? ji->code_start : NULL, code, 9, buf, sizeof (buf));
code = buf + 9;
*displacement = 0;
g_assert (reg != AMD64_R11);
*displacement = disp;
- return regs [reg];
+ return (gpointer)regs [reg];
}
int
}
gpointer
-mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
+mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, mgreg_t *regs, guint8 *code)
{
return (gpointer)regs [mono_arch_get_this_arg_reg (sig, gsctx, code)];
}
*/
appdomain_tls_offset = mono_domain_get_tls_key ();
lmf_tls_offset = mono_get_jit_tls_key ();
- thread_tls_offset = mono_thread_get_tls_key ();
lmf_addr_tls_offset = mono_get_jit_tls_key ();
/* Only 64 tls entries can be accessed using inline code */
appdomain_tls_offset = -1;
if (lmf_tls_offset >= 64)
lmf_tls_offset = -1;
- if (thread_tls_offset >= 64)
- thread_tls_offset = -1;
#else
tls_offset_inited = TRUE;
#ifdef MONO_XEN_OPT
appdomain_tls_offset = mono_domain_get_tls_offset ();
lmf_tls_offset = mono_get_lmf_tls_offset ();
lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
- thread_tls_offset = mono_thread_get_tls_offset ();
#endif
}
}
}
MonoMethod*
-mono_arch_find_imt_method (gpointer *regs, guint8 *code)
+mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
{
- return regs [MONO_ARCH_IMT_REG];
+ return (MonoMethod*)regs [MONO_ARCH_IMT_REG];
}
MonoObject*
-mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
+mono_arch_find_this_argument (mgreg_t *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
{
- return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
+ return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), regs, NULL);
}
#endif
MonoVTable*
-mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
+mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
{
return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
}
return ins;
}
-MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
-{
- MonoInst* ins;
-
- if (thread_tls_offset == -1)
- return NULL;
-
- MONO_INST_NEW (cfg, ins, OP_TLS_GET);
- ins->inst_offset = thread_tls_offset;
- return ins;
-}
-
#define _CTX_REG(ctx,fld,i) ((gpointer)((&ctx->fld)[i]))
gpointer
g_assert_not_reached ();
}
}
+
+/*
+ * mono_arch_set_breakpoint:
+ *
+ * Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
+ * The location should contain code emitted by OP_SEQ_POINT.
+ */
+void
+mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+ guint8 *code = ip;
+ guint8 *orig_code = code;
+
+ /*
+ * In production, we will use int3 (has to fix the size in the md
+ * file). But that could confuse gdb, so during development, we emit a SIGSEGV
+ * instead.
+ */
+ g_assert (code [0] == 0x90);
+
+ g_assert (((guint64)bp_trigger_page >> 32) == 0);
+
+ amd64_mov_reg_mem (code, AMD64_R11, (guint64)bp_trigger_page, 4);
+ g_assert (code - orig_code == BREAKPOINT_SIZE);
+}
+
+/*
+ * mono_arch_clear_breakpoint:
+ *
+ * Clear the breakpoint at IP.
+ */
+void
+mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
+{
+ guint8 *code = ip;
+ int i;
+
+ for (i = 0; i < BREAKPOINT_SIZE; ++i)
+ x86_nop (code);
+}
+
+/*
+ * mono_arch_start_single_stepping:
+ *
+ * Start single stepping.
+ */
+void
+mono_arch_start_single_stepping (void)
+{
+ mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
+}
+
+/*
+ * mono_arch_stop_single_stepping:
+ *
+ * Stop single stepping.
+ */
+void
+mono_arch_stop_single_stepping (void)
+{
+ mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
+}
+
+/*
+ * mono_arch_is_single_step_event:
+ *
+ * Return whenever the machine state in SIGCTX corresponds to a single
+ * step event.
+ */
+gboolean
+mono_arch_is_single_step_event (siginfo_t *info, void *sigctx)
+{
+ /* Sometimes the address is off by 4 */
+ if (info->si_addr >= ss_trigger_page && (guint8*)info->si_addr <= (guint8*)ss_trigger_page + 128)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+gboolean
+mono_arch_is_breakpoint_event (siginfo_t *info, void *sigctx)
+{
+ /* Sometimes the address is off by 4 */
+ if (info->si_addr >= bp_trigger_page && (guint8*)info->si_addr <= (guint8*)bp_trigger_page + 128)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * mono_arch_get_ip_for_breakpoint:
+ *
+ * Convert the ip in CTX to the address where a breakpoint was placed.
+ */
+guint8*
+mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
+{
+ guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+ /* size of xor r11, r11 */
+ ip -= 0;
+
+ return ip;
+}
+
+/*
+ * mono_arch_get_ip_for_single_step:
+ *
+ * Convert the ip in CTX to the address stored in seq_points.
+ */
+guint8*
+mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
+{
+ guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
+
+ /* Size of amd64_mov_reg_mem (r11) */
+ ip += 8;
+
+ return ip;
+}
+
+/*
+ * mono_arch_skip_breakpoint:
+ *
+ * Modify CTX so the ip is placed after the breakpoint instruction, so when
+ * we resume, the instruction is not executed again.
+ */
+void
+mono_arch_skip_breakpoint (MonoContext *ctx)
+{
+ MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + BREAKPOINT_SIZE);
+}
+
+/*
+ * mono_arch_skip_single_step:
+ *
+ * Modify CTX so the ip is placed after the single step trigger instruction,
+ * we resume, the instruction is not executed again.
+ */
+void
+mono_arch_skip_single_step (MonoContext *ctx)
+{
+ MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 8);
+}
+
+/*
+ * mono_arch_create_seq_point_info:
+ *
+ * Return a pointer to a data structure which is used by the sequence
+ * point implementation in AOTed code.
+ */
+gpointer
+mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
+{
+ NOT_IMPLEMENTED;
+ return NULL;
+}