#include <mono/metadata/profiler-private.h>
#include <mono/metadata/mono-debug.h>
#include <mono/utils/mono-math.h>
+#include <mono/utils/mono-counters.h>
#include "trace.h"
#include "mini-x86.h"
-#include "inssel.h"
#include "cpu-x86.h"
+#include "ir-emit.h"
/* On windows, these hold the key returned by TlsAlloc () */
static gint lmf_tls_offset = -1;
}
}
+const char *
+mono_arch_xregname (int reg)
+{
+ switch (reg) {
+ case 0:
+ return "%xmm0";
+ case 1:
+ return "%xmm1";
+ case 2:
+ return "%xmm2";
+ case 3:
+ return "%xmm3";
+ case 4:
+ return "%xmm4";
+ case 5:
+ return "%xmm5";
+ case 6:
+ return "%xmm6";
+ case 7:
+ return "%xmm7";
+ default:
+ return "unknown";
+ }
+}
+
+
typedef enum {
ArgInIReg,
ArgInFloatSSEReg,
add_general (&gr, &stack_size, &cinfo->sig_cookie);
}
-#if defined(__APPLE__)
- if ((stack_size % 16) != 0) {
+ if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
cinfo->need_stack_align = TRUE;
- stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
+ cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
+ stack_size += cinfo->stack_align_amount;
}
-#endif
cinfo->stack_usage = stack_size;
cinfo->reg_usage = gr;
offset += size;
}
- align = 4;
+ if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
+ align = MONO_ARCH_FRAME_ALIGNMENT;
+ else
+ align = 4;
args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
arg_info [k].pad = pad;
opts |= MONO_OPT_SSE2;
else
*exclude_mask |= MONO_OPT_SSE2;
+
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ /*SIMD intrinsics require at least SSE2.*/
+ if (!(opts & MONO_OPT_SSE2))
+ *exclude_mask |= MONO_OPT_SIMD;
+#endif
}
return opts;
}
+/*
+ * This function test for all SSE functions supported.
+ *
+ * Returns a bitmask corresponding to all supported versions.
+ *
+ * TODO detect other versions like SSE4a.
+ */
+guint32
+mono_arch_cpu_enumerate_simd_versions (void)
+{
+ int eax, ebx, ecx, edx;
+ guint32 sse_opts = 0;
+
+ if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
+ if (edx & (1 << 25))
+ sse_opts |= 1 << SIMD_VERSION_SSE1;
+ if (edx & (1 << 26))
+ sse_opts |= 1 << SIMD_VERSION_SSE2;
+ if (ecx & (1 << 0))
+ sse_opts |= 1 << SIMD_VERSION_SSE3;
+ if (ecx & (1 << 9))
+ sse_opts |= 1 << SIMD_VERSION_SSSE3;
+ if (ecx & (1 << 19))
+ sse_opts |= 1 << SIMD_VERSION_SSE41;
+ if (ecx & (1 << 20))
+ sse_opts |= 1 << SIMD_VERSION_SSE42;
+ }
+ return sse_opts;
+}
+
/*
* Determine whenever the trap whose info is in SIGINFO is caused by
* integer overflow.
cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
- cfg->frame_reg = MONO_ARCH_BASEREG;
+ cfg->frame_reg = X86_EBP;
offset = 0;
/* Reserve space to save LMF and caller saved registers */
offset += (locals_stack_align - 1);
offset &= ~(locals_stack_align - 1);
}
+ /*
+ * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
+ * have locals larger than 8 bytes we need to make sure that
+ * they have the appropriate offset.
+ */
+ if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
+ offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
if (offsets [i] != -1) {
MonoInst *inst = cfg->varinfo [i];
}
}
-static void
-emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
-{
- MonoInst *arg;
- MonoMethodSignature *tmp_sig;
- MonoInst *sig_arg;
-
- /* FIXME: Add support for signature tokens to AOT */
- cfg->disable_aot = TRUE;
- MONO_INST_NEW (cfg, arg, OP_OUTARG);
-
- /*
- * mono_ArgIterator_Setup assumes the signature cookie is
- * passed first and all the arguments which were before it are
- * passed on the stack after the signature. So compensate by
- * passing a different signature.
- */
- tmp_sig = mono_metadata_signature_dup (call->signature);
- tmp_sig->param_count -= call->signature->sentinelpos;
- tmp_sig->sentinelpos = 0;
- memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
-
- MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
- sig_arg->inst_p0 = tmp_sig;
-
- arg->inst_left = sig_arg;
- arg->type = STACK_PTR;
- /* prepend, so they get reversed */
- arg->next = call->out_args;
- call->out_args = arg;
-}
-
/*
* It is expensive to adjust esp for each individual fp argument pushed on the stack
* so we try to do it just once when we have multiple fp arguments in a row.
* fp_arg_setup is the first argument in the execution sequence where the esp register
* is modified.
*/
-static int
+static G_GNUC_UNUSED int
collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
{
int fp_space = 0;
return fp_space;
}
-/*
- * take the arguments and generate the arch-specific
- * instructions to properly call the function in call.
- * This includes pushing, moving arguments to the right register
- * etc.
- */
-MonoCallInst*
-mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
- MonoInst *arg, *in;
- MonoMethodSignature *sig;
- int i, n;
- CallInfo *cinfo;
- int sentinelpos = 0;
- int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
-
- sig = call->signature;
- n = sig->param_count + sig->hasthis;
-
- cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
-
- if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
- sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
-
- for (i = 0; i < n; ++i) {
- ArgInfo *ainfo = cinfo->args + i;
-
- /* Emit the signature cookie just before the implicit arguments */
- if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
- emit_sig_cookie (cfg, call);
- }
-
- if (is_virtual && i == 0) {
- /* the argument will be attached to the call instrucion */
- in = call->args [i];
- } else {
- MonoType *t;
-
- if (i >= sig->hasthis)
- t = sig->params [i - sig->hasthis];
- else
- t = &mono_defaults.int_class->byval_arg;
- t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
-
- MONO_INST_NEW (cfg, arg, OP_OUTARG);
- in = call->args [i];
- arg->cil_code = in->cil_code;
- arg->inst_left = in;
- arg->type = in->type;
- /* prepend, so they get reversed */
- arg->next = call->out_args;
- call->out_args = arg;
-
- if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
- gint align;
- guint32 ialign;
- guint32 size;
-
- if (t->type == MONO_TYPE_TYPEDBYREF) {
- size = sizeof (MonoTypedRef);
- align = sizeof (gpointer);
- }
- else {
- size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign, sig->pinvoke);
- }
- arg->opcode = OP_OUTARG_VT;
- arg->klass = in->klass;
- arg->backend.is_pinvoke = sig->pinvoke;
- arg->inst_imm = size;
- }
- else {
- switch (ainfo->storage) {
- case ArgOnStack:
- arg->opcode = OP_OUTARG;
- if (!t->byref) {
- if (t->type == MONO_TYPE_R4) {
- arg->opcode = OP_OUTARG_R4;
- } else if (t->type == MONO_TYPE_R8) {
- arg->opcode = OP_OUTARG_R8;
- /* we store in the upper bits of backen.arg_info the needed
- * esp adjustment and in the lower bits the offset from esp
- * where the arg needs to be stored
- */
- if (!fp_args_space) {
- fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
- fp_args_offset = fp_args_space;
- }
- arg->backend.arg_info = fp_args_space - fp_args_offset;
- fp_args_offset -= sizeof (double);
- if (i - sig->hasthis == fp_arg_setup) {
- arg->backend.arg_info |= fp_args_space << 16;
- }
- if (fp_args_offset == 0) {
- /* the allocated esp stack is finished:
- * prepare for an eventual second run of fp args
- */
- fp_args_space = 0;
- }
- }
- }
- break;
- default:
- g_assert_not_reached ();
- }
- }
- }
- }
-
- /* Handle the case where there are no implicit arguments */
- if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
- emit_sig_cookie (cfg, call);
- }
-
- if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
- if (cinfo->ret.storage == ArgValuetypeInReg) {
- MonoInst *zero_inst;
- /*
- * After the call, the struct is in registers, but needs to be saved to the memory pointed
- * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
- * before calling the function. So we add a dummy instruction to represent pushing the
- * struct return address to the stack. The return address will be saved to this stack slot
- * by the code emitted in this_vret_args.
- */
- MONO_INST_NEW (cfg, arg, OP_OUTARG);
- MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
- zero_inst->inst_p0 = 0;
- arg->inst_left = zero_inst;
- arg->type = STACK_PTR;
- /* prepend, so they get reversed */
- arg->next = call->out_args;
- call->out_args = arg;
- }
- else
- /* if the function returns a struct, the called method already does a ret $0x4 */
- if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
- cinfo->stack_usage -= 4;
- }
-
- call->stack_usage = cinfo->stack_usage;
-
-#if defined(__APPLE__)
- if (cinfo->need_stack_align) {
- MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
- arg->inst_c0 = cinfo->stack_align_amount;
- arg->next = call->out_args;
- call->out_args = arg;
- }
-#endif
-
- return call;
-}
-
static void
-emit_sig_cookie2 (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
+emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
{
MonoMethodSignature *tmp_sig;
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
-#if defined(__APPLE__)
if (cinfo->need_stack_align) {
MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
arg->dreg = X86_ESP;
arg->inst_imm = cinfo->stack_align_amount;
MONO_ADD_INS (cfg->cbb, arg);
}
-#endif
if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
MonoInst *vtarg;
/* Handle the case where there are no implicit arguments */
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
- emit_sig_cookie2 (cfg, call, cinfo);
+ emit_sig_cookie (cfg, call, cinfo);
}
/* Arguments are pushed in the reverse order */
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
/* Emit the signature cookie just before the implicit arguments */
- emit_sig_cookie2 (cfg, call, cinfo);
+ emit_sig_cookie (cfg, call, cinfo);
}
}
/* The return address is passed in a register */
MONO_INST_NEW (cfg, vtarg, OP_MOVE);
vtarg->sreg1 = call->inst.dreg;
- vtarg->dreg = mono_regstate_next_int (cfg->rs);
+ vtarg->dreg = mono_alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, vtarg);
mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
MONO_ADD_INS (cfg->cbb, arg);
} else if (size <= 20) {
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
- mini_emit_memcpy2 (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
+ mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
} else {
MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
arg->inst_basereg = src->dreg;
{
guchar *code = p;
-#if __APPLE__
- x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
-#endif
+ g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
+ x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
/* if some args are passed in registers, we need to save them here */
x86_push_reg (code, X86_EBP);
mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
x86_call_code (code, 0);
}
-#if __APPLE__
- x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
-#else
- x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
-#endif
+ x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
return code;
}
{
MonoInst *ins, *next;
- if (bb->max_vreg > cfg->rs->next_vreg)
- cfg->rs->next_vreg = bb->max_vreg;
-
/*
* FIXME: Need to add more instructions, but the current machine
* description can't model some parts of the composite instructions like
}
}
- bb->max_vreg = cfg->rs->next_vreg;
+ bb->max_vreg = cfg->next_vreg;
}
static const int
emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
{
#define XMM_TEMP_REG 0
- if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
+ /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
+ /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
+ if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
/* optimize by assigning a local var for this use so we avoid
* the stack manipulations */
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
}
/*
- * emit_tls_get:
+ * mono_x86_emit_tls_get:
* @code: buffer to store code to
* @dreg: hard register where to place the result
* @tls_offset: offset info
*
- * emit_tls_get emits in @code the native code that puts in the dreg register
- * the item in the thread local storage identified by tls_offset.
+ * mono_x86_emit_tls_get emits in @code the native code that puts in
+ * the dreg register the item in the thread local storage identified
+ * by tls_offset.
*
* Returns: a pointer to the end of the stored code
*/
-static guint8*
-emit_tls_get (guint8* code, int dreg, int tls_offset)
+guint8*
+mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
{
#ifdef PLATFORM_WIN32
/*
#define LOOP_ALIGNMENT 8
#define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
+#ifndef DISABLE_JIT
+
void
mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
{
x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
break;
case OP_LOADU4_MEM:
- if (cfg->new_ir)
- x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
- else
- x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
+ x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
break;
case OP_LOAD_MEM:
case OP_LOADI4_MEM:
case OP_BREAK:
x86_breakpoint (code);
break;
+ case OP_RELAXED_NOP:
+ x86_prefix (code, X86_REP_PREFIX);
+ x86_nop (code);
+ break;
+ case OP_HARD_NOP:
+ x86_nop (code);
+ break;
case OP_NOP:
case OP_DUMMY_USE:
case OP_DUMMY_STORE:
}
code = emit_move_return_value (cfg, ins, code);
break;
- case OP_OUTARG:
case OP_X86_PUSH:
x86_push_reg (code, ins->sreg1);
break;
break;
}
case OP_CALL_HANDLER:
-#if __APPLE__
- x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
-#endif
+ x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
x86_call_imm (code, 0);
-#ifdef __APPLE__
- x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
-#endif
+ x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
break;
case OP_START_HANDLER: {
MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
case OP_SQRT:
x86_fsqrt (code);
break;
+ case OP_ROUND:
+ x86_frndint (code);
+ break;
case OP_IMIN:
g_assert (cfg->opt & MONO_OPT_CMOV);
g_assert (ins->dreg == ins->sreg1);
break;
}
case OP_TLS_GET: {
- code = emit_tls_get (code, ins->dreg, ins->inst_offset);
+ code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
break;
}
case OP_MEMORY_BARRIER: {
break;
}
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+ case OP_ADDPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DIVPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPS:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SQRTPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
+ break;
+ case OP_RSQRTPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
+ break;
+ case OP_RCPPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
+ break;
+ case OP_ADDSUBPS:
+ x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPS:
+ x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPS:
+ x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPS_HIGH:
+ x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
+ break;
+ case OP_DUPPS_LOW:
+ x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PSHUFLEW_HIGH:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
+ break;
+ case OP_PSHUFLEW_LOW:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
+ break;
+ case OP_PSHUFLED:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+
+ case OP_ADDPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DIVPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MULPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
+ break;
+ case OP_SUBPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MAXPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
+ break;
+ case OP_MINPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
+ break;
+ case OP_COMPPD:
+ g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_ANDPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ANDNPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ORPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_XORPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_ADDSUBPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HADDPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_HSUBPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_DUPPD:
+ x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_EXTRACT_MASK:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
+ break;
+
+ case OP_PAND:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
+ break;
+ case OP_POR:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PXOR:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUBB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMAXB:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMAXD:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PAVGB_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PAVGW_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMINB:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMINW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMIND:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPEQB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPEQQ:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PCMPGTB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PCMPGTQ:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSUM_ABS_DIFF:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_LOWB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_LOWPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_UNPACK_HIGHB:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPS:
+ x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
+ break;
+ case OP_UNPACK_HIGHPD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PACKW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKW_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PACKD_UN:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PADDB_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBB_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PADDW_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PSUBW_SAT:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PMULW:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULD:
+ x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULQ:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH_UN:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
+ break;
+ case OP_PMULW_HIGH:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
+ break;
+
+ case OP_PSHRW:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRW_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARW:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARW_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLW:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLW_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRD:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRD_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSARD:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSARD_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLD:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLD_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHRQ:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHRQ_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_PSHLQ:
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
+ break;
+ case OP_PSHLQ_REG:
+ x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
+ break;
+
+ case OP_ICONV_TO_X:
+ x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_EXTRACT_I4:
+ x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_EXTRACT_I1:
+ case OP_EXTRACT_U1:
+ x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
+ if (ins->inst_c0)
+ x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
+ x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
+ break;
+ case OP_EXTRACT_I2:
+ case OP_EXTRACT_U2:
+ x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
+ if (ins->inst_c0)
+ x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
+ x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
+ break;
+ case OP_EXTRACT_R8:
+ if (ins->inst_c0)
+ x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
+ else
+ x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
+ x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
+ break;
+
+ case OP_INSERT_I2:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
+ break;
+ case OP_EXTRACTX_U2:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
+ break;
+ case OP_INSERTX_U1_SLOW:
+ /*sreg1 is the extracted ireg (scratch)
+ /sreg2 is the to be inserted ireg (scratch)
+ /dreg is the xreg to receive the value*/
+
+ /*clear the bits from the extracted word*/
+ x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
+ /*shift the value to insert if needed*/
+ if (ins->inst_c0 & 1)
+ x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
+ /*join them together*/
+ x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
+ break;
+ case OP_INSERTX_I4_SLOW:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
+ x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
+ break;
+
+ case OP_INSERTX_R4_SLOW:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
+ /*TODO if inst_c0 == 0 use movss*/
+ x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
+ x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
+ break;
+ case OP_INSERTX_R8_SLOW:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+ if (ins->inst_c0)
+ x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ else
+ x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ break;
+
+ case OP_STOREX_MEMBASE_REG:
+ case OP_STOREX_MEMBASE:
+ x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_LOADX_MEMBASE:
+ x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_LOADX_ALIGNED_MEMBASE:
+ x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_STOREX_ALIGNED_MEMBASE_REG:
+ x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
+ break;
+ case OP_STOREX_NTA_MEMBASE_REG:
+ x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
+ break;
+ case OP_PREFETCH_MEMBASE:
+ x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
+
+ break;
+ case OP_XMOVE:
+ /*FIXME the peephole pass should have killed this*/
+ if (ins->dreg != ins->sreg1)
+ x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
+ break;
+ case OP_XZERO:
+ x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
+ break;
+ case OP_ICONV_TO_R8_RAW:
+ x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
+ x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
+ break;
+
+ case OP_FCONV_TO_R8_X:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+ x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ break;
+
+ case OP_XCONV_R8_TO_I4:
+ x86_cvttsd2si (code, ins->dreg, ins->sreg1);
+ switch (ins->backend.source_opcode) {
+ case OP_FCONV_TO_I1:
+ x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
+ break;
+ case OP_FCONV_TO_U1:
+ x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
+ break;
+ case OP_FCONV_TO_I2:
+ x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
+ break;
+ case OP_FCONV_TO_U2:
+ x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
+ break;
+ }
+ break;
+
+ case OP_EXPAND_I1:
+ /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
+ /*The +4 is to get a mov ?h, ?l over the same reg.*/
+ x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I2:
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
+ x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_I4:
+ x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R4:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
+ x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
+ break;
+ case OP_EXPAND_R8:
+ x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
+ x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
+ x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
+ break;
+#endif
default:
g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
g_assert_not_reached ();
cfg->code_len = code - cfg->native_code;
}
+#endif /* DISABLE_JIT */
+
void
mono_arch_register_lowlevel_calls (void)
{
case MONO_PATCH_INFO_LABEL:
case MONO_PATCH_INFO_RGCTX_FETCH:
case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
+ case MONO_PATCH_INFO_MONITOR_ENTER:
+ case MONO_PATCH_INFO_MONITOR_EXIT:
x86_patch (ip, target);
break;
case MONO_PATCH_INFO_NONE:
if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
guint8 *buf, *no_domain_branch;
- code = emit_tls_get (code, X86_EAX, appdomain_tls_offset);
+ code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
no_domain_branch = code;
x86_branch8 (code, X86_CC_NE, 0, 0);
- code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
+ code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
x86_test_reg_reg (code, X86_EAX, X86_EAX);
buf = code;
x86_branch8 (code, X86_CC_NE, 0, 0);
if (lmf_addr_tls_offset != -1) {
/* Load lmf quicky using the GS register */
- code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
+ code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
#ifdef PLATFORM_WIN32
/* The TLS key actually contains a pointer to the MonoJitTlsData structure */
/* FIXME: Add a separate key for LMF to avoid this */
alloc_size -= pos;
-#if __APPLE__
/* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
- {
+ if (mono_do_x86_stack_align) {
int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
- if (tot & 4) {
- tot += 4;
- alloc_size += 4;
- }
- if (tot & 8) {
- alloc_size += 8;
- }
+ tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
+ alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
}
-#endif
if (alloc_size) {
/* See mono_emit_stack_alloc */
#endif
}
-#if __APPLE__ && DEBUG_APPLE_ALIGNMENT
+ if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
+ cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
+ x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
+ }
+
+#if DEBUG_STACK_ALIGNMENT
/* check the stack is aligned */
- x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
- x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
- x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
- x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
- x86_breakpoint (code);
+ if (method->wrapper_type == MONO_WRAPPER_NONE) {
+ x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
+ x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
+ x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
+ x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
+ x86_breakpoint (code);
+ }
#endif
/* compute max_offset in order to use short forward jumps */
gint32 prev_lmf_reg;
gint32 lmf_offset = -sizeof (MonoLMF);
+ /* check if we need to restore protection of the stack after a stack overflow */
+ if (mono_get_jit_tls_offset () != -1) {
+ guint8 *patch;
+ code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
+ /* we load the value in a separate instruction: this mechanism may be
+ * used later as a safer way to do thread interruption
+ */
+ x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
+ x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
+ patch = code;
+ x86_branch8 (code, X86_CC_Z, 0, FALSE);
+ /* note that the call trampoline will preserve eax/edx */
+ x86_call_reg (code, X86_ECX);
+ x86_patch (patch, code);
+ } else {
+ /* FIXME: maybe save the jit tls in the prolog */
+ }
if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
/*
* Optimized version which uses the mono_lmf TLS variable instead of indirection
{
}
-void
-mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
-{
- MonoCallInst *call = (MonoCallInst*)inst;
- CallInfo *cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, inst->signature, FALSE);
-
- /* add the this argument */
- if (this_reg != -1) {
- if (cinfo->args [0].storage == ArgInIReg) {
- MonoInst *this;
- MONO_INST_NEW (cfg, this, OP_MOVE);
- this->type = this_type;
- this->sreg1 = this_reg;
- this->dreg = mono_regstate_next_int (cfg->rs);
- mono_bblock_add_inst (cfg->cbb, this);
-
- mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
- }
- else {
- MonoInst *this;
- MONO_INST_NEW (cfg, this, OP_OUTARG);
- this->type = this_type;
- this->sreg1 = this_reg;
- mono_bblock_add_inst (cfg->cbb, this);
- }
- }
-
- if (vt_reg != -1) {
- MonoInst *vtarg;
-
- if (cinfo->ret.storage == ArgValuetypeInReg) {
- /*
- * The valuetype is in EAX:EDX after the call, needs to be copied to
- * the stack. Save the address here, so the call instruction can
- * access it.
- */
- MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
- vtarg->inst_destbasereg = X86_ESP;
- vtarg->inst_offset = inst->stack_usage;
- vtarg->sreg1 = vt_reg;
- mono_bblock_add_inst (cfg->cbb, vtarg);
- }
- else if (cinfo->ret.storage == ArgInIReg) {
- /* The return address is passed in a register */
- MONO_INST_NEW (cfg, vtarg, OP_MOVE);
- vtarg->sreg1 = vt_reg;
- vtarg->dreg = mono_regstate_next_int (cfg->rs);
- mono_bblock_add_inst (cfg->cbb, vtarg);
-
- mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
- } else {
- MonoInst *vtarg;
- MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
- vtarg->type = STACK_MP;
- vtarg->sreg1 = vt_reg;
- mono_bblock_add_inst (cfg->cbb, vtarg);
- }
- }
-}
-
#ifdef MONO_ARCH_HAVE_IMT
// Linear handler, the bsearch head compare is shorter
* LOCKING: called with the domain lock held
*/
gpointer
-mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
+mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
+ gpointer fail_tramp)
{
int i;
int size = 0;
item->chunk_size += CMP_SIZE;
item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
} else {
- item->chunk_size += JUMP_IMM_SIZE;
+ if (fail_tramp) {
+ item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
+ } else {
+ item->chunk_size += JUMP_IMM_SIZE;
#if ENABLE_WRONG_METHOD_CHECK
- item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
+ item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
#endif
+ }
}
} else {
item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
}
size += item->chunk_size;
}
- code = mono_code_manager_reserve (domain->code_mp, size);
+ if (fail_tramp)
+ code = mono_method_alloc_generic_virtual_thunk (domain, size);
+ else
+ code = mono_code_manager_reserve (domain->code_mp, size);
start = code;
for (i = 0; i < count; ++i) {
MonoIMTCheckItem *item = imt_entries [i];
if (item->is_equals) {
if (item->check_target_idx) {
if (!item->compare_done)
- x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
+ x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
item->jmp_code = code;
x86_branch8 (code, X86_CC_NE, 0, FALSE);
- x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
+ if (fail_tramp)
+ x86_jump_code (code, item->value.target_code);
+ else
+ x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
} else {
- /* enable the commented code to assert on wrong method */
+ if (fail_tramp) {
+ x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
+ item->jmp_code = code;
+ x86_branch8 (code, X86_CC_NE, 0, FALSE);
+ x86_jump_code (code, item->value.target_code);
+ x86_patch (item->jmp_code, code);
+ x86_jump_code (code, fail_tramp);
+ item->jmp_code = NULL;
+ } else {
+ /* enable the commented code to assert on wrong method */
#if ENABLE_WRONG_METHOD_CHECK
- x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
- item->jmp_code = code;
- x86_branch8 (code, X86_CC_NE, 0, FALSE);
+ x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
+ item->jmp_code = code;
+ x86_branch8 (code, X86_CC_NE, 0, FALSE);
#endif
- x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
+ x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
#if ENABLE_WRONG_METHOD_CHECK
- x86_patch (item->jmp_code, code);
- x86_breakpoint (code);
- item->jmp_code = NULL;
+ x86_patch (item->jmp_code, code);
+ x86_breakpoint (code);
+ item->jmp_code = NULL;
#endif
+ }
}
} else {
- x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
+ x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
item->jmp_code = code;
if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
x86_branch8 (code, X86_CC_GE, 0, FALSE);
}
}
}
-
- mono_stats.imt_thunks_size += code - start;
+
+ if (!fail_tramp)
+ mono_stats.imt_thunks_size += code - start;
g_assert (code - start <= size);
return start;
}
return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
}
-MonoInst*
-mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
-{
- MonoInst *ins = NULL;
-
- if (cmethod->klass == mono_defaults.math_class) {
- if (strcmp (cmethod->name, "Sin") == 0) {
- MONO_INST_NEW (cfg, ins, OP_SIN);
- ins->inst_i0 = args [0];
- } else if (strcmp (cmethod->name, "Cos") == 0) {
- MONO_INST_NEW (cfg, ins, OP_COS);
- ins->inst_i0 = args [0];
- } else if (strcmp (cmethod->name, "Tan") == 0) {
- MONO_INST_NEW (cfg, ins, OP_TAN);
- ins->inst_i0 = args [0];
- } else if (strcmp (cmethod->name, "Atan") == 0) {
- MONO_INST_NEW (cfg, ins, OP_ATAN);
- ins->inst_i0 = args [0];
- } else if (strcmp (cmethod->name, "Sqrt") == 0) {
- MONO_INST_NEW (cfg, ins, OP_SQRT);
- ins->inst_i0 = args [0];
- } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
- MONO_INST_NEW (cfg, ins, OP_ABS);
- ins->inst_i0 = args [0];
- }
-
- if (cfg->opt & MONO_OPT_CMOV) {
- int opcode = 0;
-
- if (strcmp (cmethod->name, "Min") == 0) {
- if (fsig->params [0]->type == MONO_TYPE_I4)
- opcode = OP_IMIN;
- else if (fsig->params [0]->type == MONO_TYPE_U4)
- opcode = OP_IMIN_UN;
- } else if (strcmp (cmethod->name, "Max") == 0) {
- if (fsig->params [0]->type == MONO_TYPE_I4)
- opcode = OP_IMAX;
- else if (fsig->params [0]->type == MONO_TYPE_U4)
- opcode = OP_IMAX_UN;
- }
-
- if (opcode) {
- MONO_INST_NEW (cfg, ins, opcode);
- ins->inst_i0 = args [0];
- ins->inst_i1 = args [1];
- }
- }
-
-#if 0
- /* OP_FREM is not IEEE compatible */
- else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
- MONO_INST_NEW (cfg, ins, OP_FREM);
- ins->inst_i0 = args [0];
- ins->inst_i1 = args [1];
- }
-#endif
- }
-
- return ins;
-}
-
MonoInst*
mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
opcode = OP_SQRT;
} else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
opcode = OP_ABS;
+ } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
+ opcode = OP_ROUND;
}
if (opcode) {
default: return ((gpointer)(&ctx->eax)[reg]);
}
}
+
+#ifdef MONO_ARCH_SIMD_INTRINSICS
+
+static MonoInst*
+get_float_to_x_spill_area (MonoCompile *cfg)
+{
+ if (!cfg->fconv_to_r8_x_var) {
+ cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
+ cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
+ }
+ return cfg->fconv_to_r8_x_var;
+}
+
+/*
+ * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
+ */
+void
+mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
+{
+ MonoInst *fconv;
+ int dreg, src_opcode;
+
+ if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD))
+ return;
+
+ switch (src_opcode = ins->opcode) {
+ case OP_FCONV_TO_I1:
+ case OP_FCONV_TO_U1:
+ case OP_FCONV_TO_I2:
+ case OP_FCONV_TO_U2:
+ case OP_FCONV_TO_I4:
+ case OP_FCONV_TO_I:
+ break;
+ default:
+ return;
+ }
+
+ /* dreg is the IREG and sreg1 is the FREG */
+ MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
+ fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
+ fconv->sreg1 = ins->sreg1;
+ fconv->dreg = mono_alloc_ireg (cfg);
+ fconv->type = STACK_VTYPE;
+ fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
+
+ mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
+
+ dreg = ins->dreg;
+ NULLIFY_INS (ins);
+ ins->opcode = OP_XCONV_R8_TO_I4;
+
+ ins->klass = mono_defaults.int32_class;
+ ins->sreg1 = fconv->dreg;
+ ins->dreg = dreg;
+ ins->type = STACK_I4;
+ ins->backend.source_opcode = src_opcode;
+}
+
+void
+mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
+{
+ MonoInst *ins;
+ int vreg;
+ if (!(cfg->opt & MONO_OPT_SIMD))
+ return;
+
+ /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
+ switch (long_ins->opcode) {
+ case OP_EXTRACT_I8:
+ vreg = long_ins->sreg1;
+
+ if (long_ins->inst_c0) {
+ MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
+ ins->klass = long_ins->klass;
+ ins->sreg1 = long_ins->sreg1;
+ ins->inst_c0 = 2;
+ ins->type = STACK_VTYPE;
+ ins->dreg = vreg = alloc_ireg (cfg);
+ MONO_ADD_INS (cfg->cbb, ins);
+ }
+
+ MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
+ ins->klass = mono_defaults.int32_class;
+ ins->sreg1 = vreg;
+ ins->type = STACK_I4;
+ ins->dreg = long_ins->dreg + 1;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
+ ins->klass = long_ins->klass;
+ ins->sreg1 = long_ins->sreg1;
+ ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
+ ins->type = STACK_VTYPE;
+ ins->dreg = vreg = alloc_ireg (cfg);
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
+ ins->klass = mono_defaults.int32_class;
+ ins->sreg1 = vreg;
+ ins->type = STACK_I4;
+ ins->dreg = long_ins->dreg + 2;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ long_ins->opcode = OP_NOP;
+ break;
+ case OP_INSERTX_I8_SLOW:
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg2 + 1;
+ ins->inst_c0 = long_ins->inst_c0 * 2;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg2 + 2;
+ ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ long_ins->opcode = OP_NOP;
+ break;
+ case OP_EXPAND_I8:
+ MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->sreg1 + 1;
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;
+ ins->sreg2 = long_ins->sreg1 + 2;
+ ins->inst_c0 = 1;
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
+ ins->dreg = long_ins->dreg;
+ ins->sreg1 = long_ins->dreg;;
+ ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
+ ins->klass = long_ins->klass;
+ ins->type = STACK_VTYPE;
+ MONO_ADD_INS (cfg->cbb, ins);
+
+ long_ins->opcode = OP_NOP;
+ break;
+ }
+}
+#endif
+