X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fmini-amd64.c;h=b33714554e12e8b7f4837f7a2ac08bc97728e433;hb=852c512d416efb6ed34493571ee87abab7f72890;hp=d57e7c89c392bd0e3dbd7006be2b9676f8e2dce3;hpb=df4276338409c92fc7109acfaa82d12f83021f99;p=mono.git diff --git a/mono/mini/mini-amd64.c b/mono/mini/mini-amd64.c index d57e7c89c39..b33714554e1 100644 --- a/mono/mini/mini-amd64.c +++ b/mono/mini/mini-amd64.c @@ -30,6 +30,12 @@ #include "inssel.h" #include "cpu-amd64.h" +/* + * Can't define this in mini-amd64.h cause that would turn on the generic code in + * method-to-ir.c. + */ +#define MONO_ARCH_IMT_REG AMD64_R11 + static gint lmf_tls_offset = -1; static gint lmf_addr_tls_offset = -1; static gint appdomain_tls_offset = -1; @@ -41,8 +47,6 @@ static gboolean optimize_for_xen = TRUE; #define optimize_for_xen 0 #endif -static gboolean use_sse2 = !MONO_ARCH_USE_FPSTACK; - #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) #define IS_IMM32(val) ((((guint64)val) >> 32) == 0) @@ -50,8 +54,8 @@ static gboolean use_sse2 = !MONO_ARCH_USE_FPSTACK; #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f)) #ifdef PLATFORM_WIN32 -/* Under windows, the default pinvoke calling convention is stdcall */ -#define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT)) +/* Under windows, the calling convention is never stdcall */ +#define CALLCONV_IS_STDCALL(call_conv) (FALSE) #else #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL) #endif @@ -64,7 +68,12 @@ static CRITICAL_SECTION mini_arch_mutex; MonoBreakpointInfo mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE]; +#ifdef PLATFORM_WIN32 +/* On Win64 always reserve first 32 bytes for first four arguments */ +#define ARGS_OFFSET 48 +#else #define ARGS_OFFSET 16 +#endif #define GP_SCRATCH_REG AMD64_R11 /* @@ -84,6 +93,8 @@ mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE]; * UNORDERED 1 1 1 */ +void mini_emit_memcpy2 (MonoCompile *cfg, int destreg, int doffset, int srcreg, int soffset, int size, int align); + const char* mono_arch_regname (int reg) { @@ -170,15 +181,19 @@ amd64_is_near_call (guint8 *code) static inline void amd64_patch (unsigned char* code, gpointer target) { + guint8 rex = 0; + /* Skip REX */ - if ((code [0] >= 0x40) && (code [0] <= 0x4f)) + if ((code [0] >= 0x40) && (code [0] <= 0x4f)) { + rex = code [0]; code += 1; + } if ((code [0] & 0xf8) == 0xb8) { /* amd64_set_reg_template */ *(guint64*)(code + 1) = (guint64)target; } - else if (code [0] == 0x8b) { + else if ((code [0] == 0x8b) && rex && x86_modrm_mod (code [1]) == 0 && x86_modrm_rm (code [1]) == 5) { /* mov 0(%rip), %dreg */ *(guint32*)(code + 2) = (guint32)(guint64)target - 7; } @@ -208,6 +223,7 @@ typedef enum { ArgInDoubleSSEReg, ArgOnStack, ArgValuetypeInReg, + ArgValuetypeAddrInIReg, ArgNone /* only in pair_storage */ } ArgStorage; @@ -310,7 +326,7 @@ merge_argument_class_from_type (MonoType *type, ArgumentClass class1) ArgumentClass class2 = ARG_CLASS_NO_CLASS; MonoType *ptype; - ptype = mono_type_get_underlying_type (type); + ptype = mini_type_get_underlying_type (NULL, type); switch (ptype->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_CHAR: @@ -335,7 +351,11 @@ merge_argument_class_from_type (MonoType *type, ArgumentClass class1) break; case MONO_TYPE_R4: case MONO_TYPE_R8: +#ifdef PLATFORM_WIN32 + class2 = ARG_CLASS_INTEGER; +#else class2 = ARG_CLASS_SSE; +#endif break; case MONO_TYPE_TYPEDBYREF: @@ -383,16 +403,27 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn { guint32 size, quad, nquads, i; ArgumentClass args [2]; - MonoMarshalType *info; + MonoMarshalType *info = NULL; MonoClass *klass; + MonoGenericSharingContext tmp_gsctx; - klass = mono_class_from_mono_type (type); - if (sig->pinvoke) - size = mono_type_native_stack_size (&klass->byval_arg, NULL); - else - size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL); + /* + * The gsctx currently contains no data, it is only used for checking whenever + * open types are allowed, some callers like mono_arch_get_argument_info () + * don't pass it to us, so work around that. + */ + if (!gsctx) + gsctx = &tmp_gsctx; - if (!sig->pinvoke || (size == 0) || (size > 16)) { + klass = mono_class_from_mono_type (type); + size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke); +#ifndef PLATFORM_WIN32 + if (!sig->pinvoke && !disable_vtypes_in_regs && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) { + /* We pass and return vtypes of size 8 in a register */ + } else if (!sig->pinvoke || (size == 0) || (size > 16)) { +#else + if (!sig->pinvoke) { +#endif /* Allways pass in memory */ ainfo->offset = *stack_size; *stack_size += ALIGN_TO (size, 8); @@ -410,48 +441,92 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn else nquads = 1; - /* - * Implement the algorithm from section 3.2.3 of the X86_64 ABI. - * The X87 and SSEUP stuff is left out since there are no such types in - * the CLR. - */ - info = mono_marshal_load_type_info (klass); - g_assert (info); - if (info->native_size > 16) { - ainfo->offset = *stack_size; - *stack_size += ALIGN_TO (info->native_size, 8); - ainfo->storage = ArgOnStack; + if (!sig->pinvoke) { + /* Always pass in 1 or 2 integer registers */ + args [0] = ARG_CLASS_INTEGER; + args [1] = ARG_CLASS_INTEGER; + /* Only the simplest cases are supported */ + if (is_return && nquads != 1) { + args [0] = ARG_CLASS_MEMORY; + args [1] = ARG_CLASS_MEMORY; + } + } else { + /* + * Implement the algorithm from section 3.2.3 of the X86_64 ABI. + * The X87 and SSEUP stuff is left out since there are no such types in + * the CLR. + */ + info = mono_marshal_load_type_info (klass); + g_assert (info); - return; - } +#ifndef PLATFORM_WIN32 + if (info->native_size > 16) { + ainfo->offset = *stack_size; + *stack_size += ALIGN_TO (info->native_size, 8); + ainfo->storage = ArgOnStack; - args [0] = ARG_CLASS_NO_CLASS; - args [1] = ARG_CLASS_NO_CLASS; - for (quad = 0; quad < nquads; ++quad) { - int size; - guint32 align; - ArgumentClass class1; - - class1 = ARG_CLASS_NO_CLASS; - for (i = 0; i < info->num_fields; ++i) { - size = mono_marshal_type_size (info->fields [i].field->type, - info->fields [i].mspec, - &align, TRUE, klass->unicode); - if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) { - /* Unaligned field */ - NOT_IMPLEMENTED; + return; + } +#else + switch (info->native_size) { + case 1: case 2: case 4: case 8: + break; + default: + if (is_return) { + ainfo->storage = ArgOnStack; + ainfo->offset = *stack_size; + *stack_size += ALIGN_TO (info->native_size, 8); } + else { + ainfo->storage = ArgValuetypeAddrInIReg; - /* Skip fields in other quad */ - if ((quad == 0) && (info->fields [i].offset >= 8)) - continue; - if ((quad == 1) && (info->fields [i].offset < 8)) - continue; + if (*gr < PARAM_REGS) { + ainfo->pair_storage [0] = ArgInIReg; + ainfo->pair_regs [0] = param_regs [*gr]; + (*gr) ++; + } + else { + ainfo->pair_storage [0] = ArgOnStack; + ainfo->offset = *stack_size; + *stack_size += 8; + } + } + + return; + } +#endif + + args [0] = ARG_CLASS_NO_CLASS; + args [1] = ARG_CLASS_NO_CLASS; + for (quad = 0; quad < nquads; ++quad) { + int size; + guint32 align; + ArgumentClass class1; + + if (info->num_fields == 0) + class1 = ARG_CLASS_MEMORY; + else + class1 = ARG_CLASS_NO_CLASS; + for (i = 0; i < info->num_fields; ++i) { + size = mono_marshal_type_size (info->fields [i].field->type, + info->fields [i].mspec, + &align, TRUE, klass->unicode); + if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) { + /* Unaligned field */ + NOT_IMPLEMENTED; + } + + /* Skip fields in other quad */ + if ((quad == 0) && (info->fields [i].offset >= 8)) + continue; + if ((quad == 1) && (info->fields [i].offset < 8)) + continue; - class1 = merge_argument_class_from_type (info->fields [i].field->type, class1); + class1 = merge_argument_class_from_type (info->fields [i].field->type, class1); + } + g_assert (class1 != ARG_CLASS_NO_CLASS); + args [quad] = class1; } - g_assert (class1 != ARG_CLASS_NO_CLASS); - args [quad] = class1; } /* Post merger cleanup */ @@ -501,7 +576,10 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn *fr = orig_fr; ainfo->offset = *stack_size; - *stack_size += ALIGN_TO (info->native_size, 8); + if (sig->pinvoke) + *stack_size += ALIGN_TO (info->native_size, 8); + else + *stack_size += nquads * sizeof (gpointer); ainfo->storage = ArgOnStack; } } @@ -515,14 +593,13 @@ add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgIn * Draft Version 0.23" document for more information. */ static CallInfo* -get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke) +get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke) { guint32 i, gr, fr; MonoType *ret_type; int n = sig->hasthis + sig->param_count; guint32 stack_size = 0; CallInfo *cinfo; - MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL; if (mp) cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n)); @@ -534,8 +611,7 @@ get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboo /* return value */ { - ret_type = mono_type_get_underlying_type (sig->ret); - ret_type = mini_get_basic_type_from_generic (gsctx, ret_type); + ret_type = mini_type_get_underlying_type (gsctx, sig->ret); switch (ret_type->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: @@ -614,6 +690,14 @@ get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboo ArgInfo *ainfo = &cinfo->args [sig->hasthis + i]; MonoType *ptype; +#ifdef PLATFORM_WIN32 + /* The float param registers and other param registers must be the same index on Windows x64.*/ + if (gr > fr) + fr = gr; + else if (fr > gr) + gr = fr; +#endif + if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) { /* We allways pass the sig cookie on the stack for simplicity */ /* @@ -631,8 +715,7 @@ get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboo add_general (&gr, &stack_size, ainfo); continue; } - ptype = mono_type_get_underlying_type (sig->params [i]); - ptype = mini_get_basic_type_from_generic (gsctx, ptype); + ptype = mini_type_get_underlying_type (gsctx, sig->params [i]); switch (ptype->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: @@ -669,8 +752,12 @@ get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboo add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size); break; case MONO_TYPE_TYPEDBYREF: +#ifdef PLATFORM_WIN32 + add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size); +#else stack_size += sizeof (MonoTypedRef); ainfo->storage = ArgOnStack; +#endif break; case MONO_TYPE_U8: case MONO_TYPE_I8: @@ -696,10 +783,8 @@ get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboo } #ifdef PLATFORM_WIN32 - if (stack_size < 32) { - /* The Win64 ABI requires 32 bits */ - stack_size = 32; - } + // There always is 32 bytes reserved on the stack when calling on Winx64 + stack_size += 0x20; #endif if (stack_size & 0x8) { @@ -751,9 +836,18 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit static int cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) { +#ifndef _MSC_VER __asm__ __volatile__ ("cpuid" : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx) : "a" (id)); +#else + int info[4]; + __cpuid(info, id); + *p_eax = info[0]; + *p_ebx = info[1]; + *p_ecx = info[2]; + *p_edx = info[3]; +#endif return 1; } @@ -773,7 +867,9 @@ mono_arch_cpu_init (void) __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw)); __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw)); #else - _control87 (_PC_53, MCW_PC); + /* TODO: This is crashing on Win64 right now. + * _control87 (_PC_53, MCW_PC); + */ #endif } @@ -818,13 +914,8 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask) } else *exclude_mask |= MONO_OPT_CMOV; } - return opts; -} -gboolean -mono_amd64_is_sse2 (void) -{ - return use_sse2; + return opts; } GList * @@ -878,7 +969,7 @@ mono_arch_compute_omit_fp (MonoCompile *cfg) sig = mono_method_signature (cfg->method); if (!cfg->arch.cinfo) - cfg->arch.cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE); + cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE); cinfo = cfg->arch.cinfo; /* @@ -887,9 +978,7 @@ mono_arch_compute_omit_fp (MonoCompile *cfg) cfg->arch.omit_fp = TRUE; cfg->arch.omit_fp_computed = TRUE; - /* Temporarily disable this when running in the debugger until we have support - * for this in the debugger. */ - if (mono_debug_using_mono_debugger ()) + if (cfg->disable_omit_fp) cfg->arch.omit_fp = FALSE; if (!debug_omit_fp ()) @@ -942,19 +1031,100 @@ mono_arch_get_global_int_regs (MonoCompile *cfg) mono_arch_compute_omit_fp (cfg); - if (cfg->arch.omit_fp) - regs = g_list_prepend (regs, (gpointer)AMD64_RBP); + if (cfg->globalra) { + if (cfg->arch.omit_fp) + regs = g_list_prepend (regs, (gpointer)AMD64_RBP); + + regs = g_list_prepend (regs, (gpointer)AMD64_RBX); + regs = g_list_prepend (regs, (gpointer)AMD64_R12); + regs = g_list_prepend (regs, (gpointer)AMD64_R13); + regs = g_list_prepend (regs, (gpointer)AMD64_R14); + regs = g_list_prepend (regs, (gpointer)AMD64_R15); + + regs = g_list_prepend (regs, (gpointer)AMD64_R10); + regs = g_list_prepend (regs, (gpointer)AMD64_R9); + regs = g_list_prepend (regs, (gpointer)AMD64_R8); + regs = g_list_prepend (regs, (gpointer)AMD64_RDI); + regs = g_list_prepend (regs, (gpointer)AMD64_RSI); + regs = g_list_prepend (regs, (gpointer)AMD64_RDX); + regs = g_list_prepend (regs, (gpointer)AMD64_RCX); + regs = g_list_prepend (regs, (gpointer)AMD64_RAX); + } else { + if (cfg->arch.omit_fp) + regs = g_list_prepend (regs, (gpointer)AMD64_RBP); + + /* We use the callee saved registers for global allocation */ + regs = g_list_prepend (regs, (gpointer)AMD64_RBX); + regs = g_list_prepend (regs, (gpointer)AMD64_R12); + regs = g_list_prepend (regs, (gpointer)AMD64_R13); + regs = g_list_prepend (regs, (gpointer)AMD64_R14); + regs = g_list_prepend (regs, (gpointer)AMD64_R15); + } + + return regs; +} + +GList* +mono_arch_get_global_fp_regs (MonoCompile *cfg) +{ + GList *regs = NULL; + int i; - /* We use the callee saved registers for global allocation */ - regs = g_list_prepend (regs, (gpointer)AMD64_RBX); - regs = g_list_prepend (regs, (gpointer)AMD64_R12); - regs = g_list_prepend (regs, (gpointer)AMD64_R13); - regs = g_list_prepend (regs, (gpointer)AMD64_R14); - regs = g_list_prepend (regs, (gpointer)AMD64_R15); + /* All XMM registers */ + for (i = 0; i < 16; ++i) + regs = g_list_prepend (regs, GINT_TO_POINTER (i)); return regs; } +GList* +mono_arch_get_iregs_clobbered_by_call (MonoCallInst *call) +{ + static GList *r = NULL; + + if (r == NULL) { + GList *regs = NULL; + + regs = g_list_prepend (regs, (gpointer)AMD64_RBP); + regs = g_list_prepend (regs, (gpointer)AMD64_RBX); + regs = g_list_prepend (regs, (gpointer)AMD64_R12); + regs = g_list_prepend (regs, (gpointer)AMD64_R13); + regs = g_list_prepend (regs, (gpointer)AMD64_R14); + regs = g_list_prepend (regs, (gpointer)AMD64_R15); + + regs = g_list_prepend (regs, (gpointer)AMD64_R10); + regs = g_list_prepend (regs, (gpointer)AMD64_R9); + regs = g_list_prepend (regs, (gpointer)AMD64_R8); + regs = g_list_prepend (regs, (gpointer)AMD64_RDI); + regs = g_list_prepend (regs, (gpointer)AMD64_RSI); + regs = g_list_prepend (regs, (gpointer)AMD64_RDX); + regs = g_list_prepend (regs, (gpointer)AMD64_RCX); + regs = g_list_prepend (regs, (gpointer)AMD64_RAX); + + InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL); + } + + return r; +} + +GList* +mono_arch_get_fregs_clobbered_by_call (MonoCallInst *call) +{ + int i; + static GList *r = NULL; + + if (r == NULL) { + GList *regs = NULL; + + for (i = 0; i < AMD64_XMM_NREG; ++i) + regs = g_list_prepend (regs, GINT_TO_POINTER (MONO_MAX_IREGS + i)); + + InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL); + } + + return r; +} + /* * mono_arch_regalloc_cost: * @@ -975,13 +1145,97 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv) /* push+pop */ return (ins->opcode == OP_ARG) ? 1 : 2; } + +/* + * mono_arch_fill_argument_info: + * + * Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments + * of the method. + */ +void +mono_arch_fill_argument_info (MonoCompile *cfg) +{ + MonoMethodSignature *sig; + MonoMethodHeader *header; + MonoInst *ins; + int i; + CallInfo *cinfo; + + header = mono_method_get_header (cfg->method); + + sig = mono_method_signature (cfg->method); + + cinfo = cfg->arch.cinfo; + + /* + * Contrary to mono_arch_allocate_vars (), the information should describe + * where the arguments are at the beginning of the method, not where they can be + * accessed during the execution of the method. The later makes no sense for the + * global register allocator, since a variable can be in more than one location. + */ + if (sig->ret->type != MONO_TYPE_VOID) { + switch (cinfo->ret.storage) { + case ArgInIReg: + case ArgInFloatSSEReg: + case ArgInDoubleSSEReg: + if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) { + cfg->vret_addr->opcode = OP_REGVAR; + cfg->vret_addr->inst_c0 = cinfo->ret.reg; + } + else { + cfg->ret->opcode = OP_REGVAR; + cfg->ret->inst_c0 = cinfo->ret.reg; + } + break; + case ArgValuetypeInReg: + cfg->ret->opcode = OP_REGOFFSET; + cfg->ret->inst_basereg = -1; + cfg->ret->inst_offset = -1; + break; + default: + g_assert_not_reached (); + } + } + + for (i = 0; i < sig->param_count + sig->hasthis; ++i) { + ArgInfo *ainfo = &cinfo->args [i]; + MonoType *arg_type; + + ins = cfg->args [i]; + + if (sig->hasthis && (i == 0)) + arg_type = &mono_defaults.object_class->byval_arg; + else + arg_type = sig->params [i - sig->hasthis]; + + switch (ainfo->storage) { + case ArgInIReg: + case ArgInFloatSSEReg: + case ArgInDoubleSSEReg: + ins->opcode = OP_REGVAR; + ins->inst_c0 = ainfo->reg; + break; + case ArgOnStack: + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = -1; + ins->inst_offset = -1; + break; + case ArgValuetypeInReg: + /* Dummy */ + ins->opcode = OP_NOP; + break; + default: + g_assert_not_reached (); + } + } +} void mono_arch_allocate_vars (MonoCompile *cfg) { MonoMethodSignature *sig; MonoMethodHeader *header; - MonoInst *inst; + MonoInst *ins; int i, offset; guint32 locals_stack_size, locals_stack_align; gint32 *offsets; @@ -1023,6 +1277,8 @@ mono_arch_allocate_vars (MonoCompile *cfg) cfg->arch.lmf_offset = -offset; } } else { + if (cfg->arch.omit_fp) + cfg->arch.reg_save_area_offset = offset; /* Reserve space for caller saved registers */ for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { @@ -1036,19 +1292,24 @@ mono_arch_allocate_vars (MonoCompile *cfg) case ArgInFloatSSEReg: case ArgInDoubleSSEReg: if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) { - /* The register is volatile */ - cfg->vret_addr->opcode = OP_REGOFFSET; - cfg->vret_addr->inst_basereg = cfg->frame_reg; - if (cfg->arch.omit_fp) { - cfg->vret_addr->inst_offset = offset; - offset += 8; + if (cfg->globalra) { + cfg->vret_addr->opcode = OP_REGVAR; + cfg->vret_addr->inst_c0 = cinfo->ret.reg; } else { - offset += 8; - cfg->vret_addr->inst_offset = -offset; - } - if (G_UNLIKELY (cfg->verbose_level > 1)) { - printf ("vret_addr ="); - mono_print_ins (cfg->vret_addr); + /* The register is volatile */ + cfg->vret_addr->opcode = OP_REGOFFSET; + cfg->vret_addr->inst_basereg = cfg->frame_reg; + if (cfg->arch.omit_fp) { + cfg->vret_addr->inst_offset = offset; + offset += 8; + } else { + offset += 8; + cfg->vret_addr->inst_offset = -offset; + } + if (G_UNLIKELY (cfg->verbose_level > 1)) { + printf ("vret_addr ="); + mono_print_ins (cfg->vret_addr); + } } } else { @@ -1071,28 +1332,31 @@ mono_arch_allocate_vars (MonoCompile *cfg) default: g_assert_not_reached (); } - cfg->ret->dreg = cfg->ret->inst_c0; + if (!cfg->globalra) + cfg->ret->dreg = cfg->ret->inst_c0; } /* Allocate locals */ - offsets = mono_allocate_stack_slots_full (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align); - if (locals_stack_align) { - offset += (locals_stack_align - 1); - offset &= ~(locals_stack_align - 1); - } - for (i = cfg->locals_start; i < cfg->num_varinfo; i++) { - if (offsets [i] != -1) { - MonoInst *inst = cfg->varinfo [i]; - inst->opcode = OP_REGOFFSET; - inst->inst_basereg = cfg->frame_reg; - if (cfg->arch.omit_fp) - inst->inst_offset = (offset + offsets [i]); - else - inst->inst_offset = - (offset + offsets [i]); - //printf ("allocated local %d to ", i); mono_print_tree_nl (inst); + if (!cfg->globalra) { + offsets = mono_allocate_stack_slots_full (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align); + if (locals_stack_align) { + offset += (locals_stack_align - 1); + offset &= ~(locals_stack_align - 1); } + for (i = cfg->locals_start; i < cfg->num_varinfo; i++) { + if (offsets [i] != -1) { + MonoInst *ins = cfg->varinfo [i]; + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + if (cfg->arch.omit_fp) + ins->inst_offset = (offset + offsets [i]); + else + ins->inst_offset = - (offset + offsets [i]); + //printf ("allocated local %d to ", i); mono_print_tree_nl (ins); + } + } + offset += locals_stack_size; } - offset += locals_stack_size; if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) { g_assert (!cfg->arch.omit_fp); @@ -1101,8 +1365,8 @@ mono_arch_allocate_vars (MonoCompile *cfg) } for (i = 0; i < sig->param_count + sig->hasthis; ++i) { - inst = cfg->args [i]; - if (inst->opcode != OP_REGVAR) { + ins = cfg->args [i]; + if (ins->opcode != OP_REGVAR) { ArgInfo *ainfo = &cinfo->args [i]; gboolean inreg = TRUE; MonoType *arg_type; @@ -1112,8 +1376,43 @@ mono_arch_allocate_vars (MonoCompile *cfg) else arg_type = sig->params [i - sig->hasthis]; + if (cfg->globalra) { + /* The new allocator needs info about the original locations of the arguments */ + switch (ainfo->storage) { + case ArgInIReg: + case ArgInFloatSSEReg: + case ArgInDoubleSSEReg: + ins->opcode = OP_REGVAR; + ins->inst_c0 = ainfo->reg; + break; + case ArgOnStack: + g_assert (!cfg->arch.omit_fp); + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = ainfo->offset + ARGS_OFFSET; + break; + case ArgValuetypeInReg: + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + /* These arguments are saved to the stack in the prolog */ + offset = ALIGN_TO (offset, sizeof (gpointer)); + if (cfg->arch.omit_fp) { + ins->inst_offset = offset; + offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer); + } else { + offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer); + ins->inst_offset = - offset; + } + break; + default: + g_assert_not_reached (); + } + + continue; + } + /* FIXME: Allocate volatile arguments to registers */ - if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) + if (ins->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) inreg = FALSE; /* @@ -1124,38 +1423,62 @@ mono_arch_allocate_vars (MonoCompile *cfg) if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg)) inreg = FALSE; - inst->opcode = OP_REGOFFSET; + ins->opcode = OP_REGOFFSET; switch (ainfo->storage) { case ArgInIReg: case ArgInFloatSSEReg: case ArgInDoubleSSEReg: - inst->opcode = OP_REGVAR; - inst->dreg = ainfo->reg; + if (inreg) { + ins->opcode = OP_REGVAR; + ins->dreg = ainfo->reg; + } break; case ArgOnStack: g_assert (!cfg->arch.omit_fp); - inst->opcode = OP_REGOFFSET; - inst->inst_basereg = cfg->frame_reg; - inst->inst_offset = ainfo->offset + ARGS_OFFSET; + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; + ins->inst_offset = ainfo->offset + ARGS_OFFSET; break; case ArgValuetypeInReg: break; + case ArgValuetypeAddrInIReg: { + MonoInst *indir; + g_assert (!cfg->arch.omit_fp); + + MONO_INST_NEW (cfg, indir, 0); + indir->opcode = OP_REGOFFSET; + if (ainfo->pair_storage [0] == ArgInIReg) { + indir->inst_basereg = cfg->frame_reg; + offset = ALIGN_TO (offset, sizeof (gpointer)); + offset += (sizeof (gpointer)); + indir->inst_offset = - offset; + } + else { + indir->inst_basereg = cfg->frame_reg; + indir->inst_offset = ainfo->offset + ARGS_OFFSET; + } + + ins->opcode = OP_VTARG_ADDR; + ins->inst_left = indir; + + break; + } default: NOT_IMPLEMENTED; } - if (!inreg && (ainfo->storage != ArgOnStack)) { - inst->opcode = OP_REGOFFSET; - inst->inst_basereg = cfg->frame_reg; + if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg)) { + ins->opcode = OP_REGOFFSET; + ins->inst_basereg = cfg->frame_reg; /* These arguments are saved to the stack in the prolog */ offset = ALIGN_TO (offset, sizeof (gpointer)); if (cfg->arch.omit_fp) { - inst->inst_offset = offset; + ins->inst_offset = offset; offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer); } else { offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer); - inst->inst_offset = - offset; + ins->inst_offset = - offset; } } } @@ -1173,7 +1496,7 @@ mono_arch_create_vars (MonoCompile *cfg) sig = mono_method_signature (cfg->method); if (!cfg->arch.cinfo) - cfg->arch.cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE); + cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE); cinfo = cfg->arch.cinfo; if (cinfo->ret.storage == ArgValuetypeInReg) @@ -1215,9 +1538,40 @@ add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage } } -/* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode, - * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info - */ +static void +add_outarg_reg2 (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *tree) +{ + MonoInst *ins; + + switch (storage) { + case ArgInIReg: + MONO_INST_NEW (cfg, ins, OP_MOVE); + ins->dreg = mono_alloc_ireg (cfg); + ins->sreg1 = tree->dreg; + MONO_ADD_INS (cfg->cbb, ins); + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE); + break; + case ArgInFloatSSEReg: + MONO_INST_NEW (cfg, ins, OP_AMD64_SET_XMMREG_R4); + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = tree->dreg; + MONO_ADD_INS (cfg->cbb, ins); + + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE); + break; + case ArgInDoubleSSEReg: + MONO_INST_NEW (cfg, ins, OP_FMOVE); + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = tree->dreg; + MONO_ADD_INS (cfg->cbb, ins); + + mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE); + + break; + default: + g_assert_not_reached (); + } +} static int arg_storage_to_ldind (ArgStorage storage) @@ -1236,6 +1590,23 @@ arg_storage_to_ldind (ArgStorage storage) return -1; } +static int +arg_storage_to_load_membase (ArgStorage storage) +{ + switch (storage) { + case ArgInIReg: + return OP_LOAD_MEMBASE; + case ArgInDoubleSSEReg: + return OP_LOADR8_MEMBASE; + case ArgInFloatSSEReg: + return OP_LOADR4_MEMBASE; + default: + g_assert_not_reached (); + } + + return -1; +} + static void emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) { @@ -1265,7 +1636,10 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) MONO_INST_NEW (cfg, arg, OP_OUTARG); arg->inst_left = sig_arg; arg->type = STACK_PTR; - MONO_INST_LIST_ADD (&arg->node, &call->out_args); + + /* prepend, so they get reversed */ + arg->next = call->out_args; + call->out_args = arg; } /* @@ -1273,7 +1647,6 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) * instructions to properly call the function in call. * This includes pushing, moving arguments to the right register * etc. - * Issue: who does the spilling if needed, and when? */ MonoCallInst* mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) { @@ -1288,7 +1661,13 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, sig = call->signature; n = sig->param_count + sig->hasthis; - cinfo = get_call_info (cfg, cfg->mempool, sig, sig->pinvoke); + cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke); + + if (cfg->method->save_lmf) { + MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF); + arg->next = call->out_args; + call->out_args = arg; + } for (i = 0; i < n; ++i) { ainfo = cinfo->args + i; @@ -1307,11 +1686,16 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, arg->cil_code = in->cil_code; arg->inst_left = in; arg->type = in->type; + /* prepend, so they get reversed */ + arg->next = call->out_args; + call->out_args = arg; +#if 0 if (!cinfo->stack_usage) /* Keep the assignments to the arg registers in order if possible */ MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args); else MONO_INST_LIST_ADD (&arg->node, &call->out_args); +#endif if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) { guint32 align; @@ -1380,7 +1764,9 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, MONO_INST_NEW (cfg, arg, OP_OUTARG); arg->cil_code = in->cil_code; arg->type = in->type; - MONO_INST_LIST_ADD (&arg->node, &call->out_args); + /* prepend, so they get reversed */ + arg->next = call->out_args; + call->out_args = arg; add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load); @@ -1392,10 +1778,51 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, arg->inst_right = in; arg->type = in->type; - MONO_INST_LIST_ADD (&arg->node, &call->out_args); + /* prepend, so they get reversed */ + arg->next = call->out_args; + call->out_args = arg; } } - else { + else if (ainfo->storage == ArgValuetypeAddrInIReg){ + + /* Add a temp variable to the method*/ + MonoInst *load; + MonoInst *vtaddr = mono_compile_create_var (cfg, &in->klass->byval_arg, OP_LOCAL); + + MONO_INST_NEW (cfg, load, OP_LDADDR); + load->ssa_op = MONO_SSA_LOAD; + load->inst_left = vtaddr; + + if (ainfo->pair_storage [0] == ArgInIReg) { + /* Inserted after the copy. Load the address of the temp to the argument regster.*/ + arg->opcode = OP_OUTARG_REG; + arg->inst_left = load; + arg->inst_call = call; + arg->backend.reg3 = ainfo->pair_regs [0]; + } + else { + /* Inserted after the copy. Load the address of the temp on the stack.*/ + arg->opcode = OP_OUTARG_VT; + arg->inst_left = load; + arg->type = STACK_PTR; + arg->klass = mono_defaults.int_class; + arg->backend.is_pinvoke = sig->pinvoke; + arg->inst_imm = size; + } + + /*Copy the argument to the temp variable.*/ + MONO_INST_NEW (cfg, load, OP_MEMCPY); + load->backend.memcpy_args = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoMemcpyArgs)); + load->backend.memcpy_args->size = size; + load->backend.memcpy_args->align = align; + load->inst_left = (cfg)->varinfo [vtaddr->inst_c0]; + load->inst_right = in->inst_i0; + + // FIXME: + g_assert_not_reached (); + //MONO_INST_LIST_ADD (&load->node, &call->out_args); + } + else { arg->opcode = OP_OUTARG_VT; arg->klass = in->klass; arg->backend.is_pinvoke = sig->pinvoke; @@ -1433,15 +1860,37 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, emit_sig_cookie (cfg, call, cinfo); } + if (cinfo->ret.storage == ArgValuetypeInReg) { + /* This is needed by mono_arch_emit_this_vret_args () */ + if (!cfg->arch.vret_addr_loc) { + cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + /* Prevent it from being register allocated or optimized away */ + ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE; + } + } + if (cinfo->need_stack_align) { MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK); - MONO_INST_LIST_ADD (&arg->node, &call->out_args); + arg->inst_c0 = 8; + /* prepend, so they get reversed */ + arg->next = call->out_args; + call->out_args = arg; } +#ifdef PLATFORM_WIN32 + /* Always reserve 32 bytes of stack space on Win64 */ + /*MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK); + arg->inst_c0 = 32; + MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args);*/ + NOT_IMPLEMENTED; +#endif + +#if 0 if (cfg->method->save_lmf) { MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF); MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args); } +#endif call->stack_usage = cinfo->stack_usage; cfg->param_area = MAX (cfg->param_area, call->stack_usage); @@ -1450,6 +1899,332 @@ mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, return call; } +static void +emit_sig_cookie2 (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) +{ + MonoInst *arg; + MonoMethodSignature *tmp_sig; + MonoInst *sig_arg; + + if (call->tail_call) + NOT_IMPLEMENTED; + + /* FIXME: Add support for signature tokens to AOT */ + cfg->disable_aot = TRUE; + + g_assert (cinfo->sig_cookie.storage == ArgOnStack); + + /* + * mono_ArgIterator_Setup assumes the signature cookie is + * passed first and all the arguments which were before it are + * passed on the stack after the signature. So compensate by + * passing a different signature. + */ + tmp_sig = mono_metadata_signature_dup (call->signature); + tmp_sig->param_count -= call->signature->sentinelpos; + tmp_sig->sentinelpos = 0; + memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*)); + + MONO_INST_NEW (cfg, sig_arg, OP_ICONST); + sig_arg->dreg = mono_alloc_ireg (cfg); + sig_arg->inst_p0 = tmp_sig; + MONO_ADD_INS (cfg->cbb, sig_arg); + + MONO_INST_NEW (cfg, arg, OP_X86_PUSH); + arg->sreg1 = sig_arg->dreg; + MONO_ADD_INS (cfg->cbb, arg); +} + +#define NEW_VARSTORE(cfg,dest,var,vartype,inst) do { \ + MONO_INST_NEW ((cfg), (dest), OP_MOVE); \ + (dest)->opcode = mono_type_to_regmove ((cfg), (vartype)); \ + (dest)->klass = (var)->klass; \ + (dest)->sreg1 = (inst)->dreg; \ + (dest)->dreg = (var)->dreg; \ + if ((dest)->opcode == OP_VMOVE) (dest)->klass = mono_class_from_mono_type ((vartype)); \ + } while (0) + +#define NEW_ARGSTORE(cfg,dest,num,inst) NEW_VARSTORE ((cfg), (dest), cfg->args [(num)], cfg->arg_types [(num)], (inst)) + +#define EMIT_NEW_ARGSTORE(cfg,dest,num,inst) do { NEW_ARGSTORE ((cfg), (dest), (num), (inst)); MONO_ADD_INS ((cfg)->cbb, (dest)); } while (0) + +void +mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) +{ + MonoInst *arg, *in; + MonoMethodSignature *sig; + int i, n, stack_size; + CallInfo *cinfo; + ArgInfo *ainfo; + + stack_size = 0; + + sig = call->signature; + n = sig->param_count + sig->hasthis; + + cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke); + + if (cinfo->need_stack_align) { + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8); + } + + /* + * Emit all parameters passed in registers in non-reverse order for better readability + * and to help the optimization in emit_prolog (). + */ + for (i = 0; i < n; ++i) { + ainfo = cinfo->args + i; + + in = call->args [i]; + + if (ainfo->storage == ArgInIReg) + add_outarg_reg2 (cfg, call, ainfo->storage, ainfo->reg, in); + } + + for (i = n - 1; i >= 0; --i) { + ainfo = cinfo->args + i; + + in = call->args [i]; + + switch (ainfo->storage) { + case ArgInIReg: + /* Already done */ + break; + case ArgInFloatSSEReg: + case ArgInDoubleSSEReg: + add_outarg_reg2 (cfg, call, ainfo->storage, ainfo->reg, in); + break; + case ArgOnStack: + case ArgValuetypeInReg: + case ArgValuetypeAddrInIReg: + if (ainfo->storage == ArgOnStack && call->tail_call) { + MonoInst *call_inst = (MonoInst*)call; + cfg->args [i]->flags |= MONO_INST_VOLATILE; + EMIT_NEW_ARGSTORE (cfg, call_inst, i, in); + } else if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) { + guint32 align; + guint32 size; + + if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) { + size = sizeof (MonoTypedRef); + align = sizeof (gpointer); + } + else { + if (sig->pinvoke) + size = mono_type_native_stack_size (&in->klass->byval_arg, &align); + else { + /* + * Other backends use mono_type_stack_size (), but that + * aligns the size to 8, which is larger than the size of + * the source, leading to reads of invalid memory if the + * source is at the end of address space. + */ + size = mono_class_value_size (in->klass, &align); + } + } + g_assert (in->klass); + + if (size > 0) { + MONO_INST_NEW (cfg, arg, OP_OUTARG_VT); + arg->sreg1 = in->dreg; + arg->klass = in->klass; + arg->backend.size = size; + arg->inst_p0 = call; + arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo)); + memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo)); + + MONO_ADD_INS (cfg->cbb, arg); + } + } else { + MONO_INST_NEW (cfg, arg, OP_X86_PUSH); + arg->sreg1 = in->dreg; + if (!sig->params [i - sig->hasthis]->byref) { + if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) { + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8); + arg->opcode = OP_STORER4_MEMBASE_REG; + arg->inst_destbasereg = X86_ESP; + arg->inst_offset = 0; + } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) { + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8); + arg->opcode = OP_STORER8_MEMBASE_REG; + arg->inst_destbasereg = X86_ESP; + arg->inst_offset = 0; + } + } + MONO_ADD_INS (cfg->cbb, arg); + } + break; + default: + g_assert_not_reached (); + } + + if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) { + /* Emit the signature cookie just before the implicit arguments */ + emit_sig_cookie2 (cfg, call, cinfo); + } + } + + /* Handle the case where there are no implicit arguments */ + if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos)) { + emit_sig_cookie2 (cfg, call, cinfo); + } + + if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) { + MonoInst *vtarg; + + if (cinfo->ret.storage == ArgValuetypeInReg) { + if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) { + /* + * Tell the JIT to use a more efficient calling convention: call using + * OP_CALL, compute the result location after the call, and save the + * result there. + */ + call->vret_in_reg = TRUE; + /* + * Nullify the instruction computing the vret addr to enable + * future optimizations. + */ + if (call->vret_var) + NULLIFY_INS (call->vret_var); + } else { + if (call->tail_call) + NOT_IMPLEMENTED; + /* + * The valuetype is in RAX:RDX after the call, need to be copied to + * the stack. Push the address here, so the call instruction can + * access it. + */ + if (!cfg->arch.vret_addr_loc) { + cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL); + /* Prevent it from being register allocated or optimized away */ + ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE; + } + + MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg); + } + } + else { + MONO_INST_NEW (cfg, vtarg, OP_MOVE); + vtarg->sreg1 = call->vret_var->dreg; + vtarg->dreg = mono_alloc_preg (cfg); + MONO_ADD_INS (cfg->cbb, vtarg); + + mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE); + } + } + +#ifdef PLATFORM_WIN32 + if (call->inst.opcode != OP_JMP && OP_TAILCALL != call->inst.opcode) { + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 0x20); + } +#endif + + if (cfg->method->save_lmf) { + MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF); + MONO_ADD_INS (cfg->cbb, arg); + } + + call->stack_usage = cinfo->stack_usage; +} + +void +mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src) +{ + MonoInst *arg; + MonoCallInst *call = (MonoCallInst*)ins->inst_p0; + ArgInfo *ainfo = (ArgInfo*)ins->inst_p1; + int size = ins->backend.size; + + if (ainfo->storage == ArgValuetypeInReg) { + MonoInst *load; + int part; + + for (part = 0; part < 2; ++part) { + if (ainfo->pair_storage [part] == ArgNone) + continue; + + MONO_INST_NEW (cfg, load, arg_storage_to_load_membase (ainfo->pair_storage [part])); + load->inst_basereg = src->dreg; + load->inst_offset = part * sizeof (gpointer); + + switch (ainfo->pair_storage [part]) { + case ArgInIReg: + load->dreg = mono_alloc_ireg (cfg); + break; + case ArgInDoubleSSEReg: + case ArgInFloatSSEReg: + load->dreg = mono_alloc_freg (cfg); + break; + default: + g_assert_not_reached (); + } + MONO_ADD_INS (cfg->cbb, load); + + add_outarg_reg2 (cfg, call, ainfo->pair_storage [part], ainfo->pair_regs [part], load); + } + } else if (ainfo->storage == ArgValuetypeAddrInIReg) { + MonoInst *vtaddr, *load; + vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL); + + MONO_INST_NEW (cfg, load, OP_LDADDR); + load->inst_p0 = vtaddr; + vtaddr->flags |= MONO_INST_INDIRECT; + load->type = STACK_MP; + load->klass = vtaddr->klass; + load->dreg = mono_alloc_ireg (cfg); + MONO_ADD_INS (cfg->cbb, load); + mini_emit_memcpy2 (cfg, load->dreg, 0, src->dreg, 0, size, 4); + + if (ainfo->pair_storage [0] == ArgInIReg) { + MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE); + arg->dreg = mono_alloc_ireg (cfg); + arg->sreg1 = load->dreg; + arg->inst_imm = 0; + MONO_ADD_INS (cfg->cbb, arg); + mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, ainfo->pair_regs [0], FALSE); + } else { + MONO_INST_NEW (cfg, arg, OP_X86_PUSH); + arg->sreg1 = load->dreg; + MONO_ADD_INS (cfg->cbb, arg); + } + } else { + if (size == 8) { + /* Can't use this for < 8 since it does an 8 byte memory load */ + MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE); + arg->inst_basereg = src->dreg; + arg->inst_offset = 0; + MONO_ADD_INS (cfg->cbb, arg); + } else if (size <= 40) { + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 8)); + mini_emit_memcpy2 (cfg, X86_ESP, 0, src->dreg, 0, size, 4); + } else { + MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ); + arg->inst_basereg = src->dreg; + arg->inst_offset = 0; + arg->inst_imm = size; + MONO_ADD_INS (cfg->cbb, arg); + } + } +} + +void +mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val) +{ + MonoType *ret = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret); + + if (!ret->byref) { + if (ret->type == MONO_TYPE_R4) { + MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg); + return; + } else if (ret->type == MONO_TYPE_R8) { + MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg); + return; + } + } + + MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg); +} + #define EMIT_COND_BRANCH(ins,cond,sign) \ if (ins->flags & MONO_INST_BRLABEL) { \ if (ins->inst_i0->inst_c0) { \ @@ -1504,7 +2279,7 @@ if (ins->flags & MONO_INST_BRLABEL) { \ static guint8* emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data) { - mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); + gboolean no_patch = FALSE; /* * FIXME: Add support for thunks @@ -1523,7 +2298,7 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe near_call = TRUE; if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) { - if (((MonoMethod*)data)->klass->image->assembly->aot_module) + if (((MonoMethod*)data)->klass->image->aot_module) /* The callee might be an AOT method */ near_call = FALSE; if (((MonoMethod*)data)->dynamic) @@ -1545,9 +2320,15 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe } } else { - if (mono_find_class_init_trampoline_by_addr (data)) + if (!cfg->new_ir && mono_find_class_init_trampoline_by_addr (data)) near_call = TRUE; - else { + else if (cfg->abs_patches && g_hash_table_lookup (cfg->abs_patches, data)) { + /* + * This is not really an optimization, but required because the + * generic class init trampolines use R11 to pass the vtable. + */ + near_call = TRUE; + } else { MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data); if (info) { if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && @@ -1555,6 +2336,7 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe /* A call to the wrapped function */ if ((((guint64)data) >> 32) == 0) near_call = TRUE; + no_patch = TRUE; } else if (info->func == info->wrapper) { /* No wrapper */ @@ -1567,8 +2349,10 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe near_call = TRUE; } } - else if ((((guint64)data) >> 32) == 0) + else if ((((guint64)data) >> 32) == 0) { near_call = TRUE; + no_patch = TRUE; + } } } @@ -1584,9 +2368,18 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe #endif if (near_call) { + /* + * Align the call displacement to an address divisible by 4 so it does + * not span cache lines. This is required for code patching to work on SMP + * systems. + */ + if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0) + amd64_padding (code, 4 - ((guint32)(code + 1 - cfg->native_code) % 4)); + mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); amd64_call_code (code, 0); } else { + mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); amd64_set_reg_template (code, GP_SCRATCH_REG); amd64_call_reg (code, GP_SCRATCH_REG); } @@ -1596,11 +2389,19 @@ emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointe } static inline guint8* -emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data) +emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data, gboolean win64_adjust_stack) { - mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); - - return emit_call_body (cfg, code, patch_type, data); +#ifdef PLATFORM_WIN32 + if (win64_adjust_stack) + amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 32); +#endif + code = emit_call_body (cfg, code, patch_type, data); +#ifdef PLATFORM_WIN32 + if (win64_adjust_stack) + amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 32); +#endif + + return code; } static inline int @@ -1630,8 +2431,8 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins, *n; - MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) { - MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list); + MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) { + MonoInst *last_ins = ins->prev; switch (ins->opcode) { case OP_ADD_IMM: @@ -1645,10 +2446,8 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) */ ins->opcode = OP_X86_LEA_MEMBASE; ins->inst_basereg = ins->sreg1; - /* Fall through */ } - else - break; + break; case OP_LXOR: case OP_IXOR: if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) { @@ -1660,8 +2459,7 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) * propagation). These instruction sequences are very common * in the initlocals bblock. */ - for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2; - ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) { + for (ins2 = ins->next; ins2; ins2 = ins2->next) { if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) { ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode); ins2->sreg1 = ins->dreg; @@ -1711,146 +2509,9 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) } break; - case OP_LOAD_MEMBASE: - case OP_LOADI4_MEMBASE: - /* - * Note: if reg1 = reg2 the load op is removed - * - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_MOVE reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG - || last_ins->opcode == OP_STORE_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - if (ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } else { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->sreg1; - } - - /* - * Note: reg1 must be different from the basereg in the second load - * Note: if reg1 = reg2 is equal then second load is removed - * - * OP_LOAD_MEMBASE offset(basereg), reg1 - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_LOAD_MEMBASE offset(basereg), reg1 - * OP_MOVE reg1, reg2 - */ - } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE - || last_ins->opcode == OP_LOAD_MEMBASE) && - ins->inst_basereg != last_ins->dreg && - ins->inst_basereg == last_ins->inst_basereg && - ins->inst_offset == last_ins->inst_offset) { - - if (ins->dreg == last_ins->dreg) { - MONO_DEL_INS (ins); - continue; - } else { - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->dreg; - } - - //g_assert_not_reached (); - -#if 0 - /* - * OP_STORE_MEMBASE_IMM imm, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg - * --> - * OP_STORE_MEMBASE_IMM imm, offset(basereg) - * OP_ICONST reg, imm - */ - } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM - || last_ins->opcode == OP_STORE_MEMBASE_IMM) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_ICONST; - ins->inst_c0 = last_ins->inst_imm; - g_assert_not_reached (); // check this rule -#endif - } - break; - case OP_LOADI1_MEMBASE: - /* - * Note: if reg1 = reg2 the load op is removed - * - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_MOVE reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - if (ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } else { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->sreg1; - } - } - break; - case OP_LOADI2_MEMBASE: - /* - * Note: if reg1 = reg2 the load op is removed - * - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_MOVE reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - if (ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } else { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->sreg1; - } - } - break; - case OP_MOVE: - case OP_FMOVE: - /* - * Removes: - * - * OP_MOVE reg, reg - */ - if (ins->dreg == ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } - /* - * Removes: - * - * OP_MOVE sreg, dreg - * OP_MOVE dreg, sreg - */ - if (last_ins && last_ins->opcode == OP_MOVE && - ins->sreg1 == last_ins->dreg && - ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } - break; } + + mono_peephole_ins (bb, ins); } } @@ -1859,19 +2520,13 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins, *n; - MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) { - MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list); - + MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) { switch (ins->opcode) { case OP_ICONST: case OP_I8CONST: { - MonoInst *next; - /* reg = 0 -> XOR (reg, reg) */ /* XOR sets cflags on x86, so we cant do it always */ - next = mono_inst_list_next (&ins->node, &bb->ins_list); - if (ins->inst_c0 == 0 && (!next || - (next && INST_IGNORES_CFLAGS (next->opcode)))) { + if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) { ins->opcode = OP_LXOR; ins->sreg1 = ins->dreg; ins->sreg2 = ins->dreg; @@ -1899,8 +2554,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) * propagation). These instruction sequences are very common * in the initlocals bblock. */ - for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2; - ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) { + for (ins2 = ins->next; ins2; ins2 = ins2->next) { if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) { ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode); ins2->sreg1 = ins->dreg; @@ -1923,184 +2577,16 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1)) ins->opcode = OP_X86_DEC_REG; break; - case OP_MUL_IMM: - /* remove unnecessary multiplication with 1 */ - if (ins->inst_imm == 1) { - if (ins->dreg != ins->sreg1) { - ins->opcode = OP_MOVE; - } else { - MONO_DEL_INS (ins); - continue; - } - } - break; - case OP_COMPARE_IMM: - /* OP_COMPARE_IMM (reg, 0) - * --> - * OP_AMD64_TEST_NULL (reg) - */ - if (!ins->inst_imm) - ins->opcode = OP_AMD64_TEST_NULL; - break; - case OP_ICOMPARE_IMM: - if (!ins->inst_imm) - ins->opcode = OP_X86_TEST_NULL; - break; - case OP_AMD64_ICOMPARE_MEMBASE_IMM: - /* - * OP_STORE_MEMBASE_REG reg, offset(basereg) - * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm - * --> - * OP_STORE_MEMBASE_REG reg, offset(basereg) - * OP_COMPARE_IMM reg, imm - * - * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL - */ - if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - ins->opcode = OP_ICOMPARE_IMM; - ins->sreg1 = last_ins->sreg1; - - /* check if we can remove cmp reg,0 with test null */ - if (!ins->inst_imm) - ins->opcode = OP_X86_TEST_NULL; - } - - break; - case OP_LOAD_MEMBASE: - case OP_LOADI4_MEMBASE: - /* - * Note: if reg1 = reg2 the load op is removed - * - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_MOVE reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG - || last_ins->opcode == OP_STORE_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - if (ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } else { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->sreg1; - } - - /* - * Note: reg1 must be different from the basereg in the second load - * Note: if reg1 = reg2 is equal then second load is removed - * - * OP_LOAD_MEMBASE offset(basereg), reg1 - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_LOAD_MEMBASE offset(basereg), reg1 - * OP_MOVE reg1, reg2 - */ - } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE - || last_ins->opcode == OP_LOAD_MEMBASE) && - ins->inst_basereg != last_ins->dreg && - ins->inst_basereg == last_ins->inst_basereg && - ins->inst_offset == last_ins->inst_offset) { - - if (ins->dreg == last_ins->dreg) { - MONO_DEL_INS (ins); - continue; - } else { - ins->opcode = OP_MOVE; - ins->sreg1 = last_ins->dreg; - } - - //g_assert_not_reached (); - -#if 0 - /* - * OP_STORE_MEMBASE_IMM imm, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg - * --> - * OP_STORE_MEMBASE_IMM imm, offset(basereg) - * OP_ICONST reg, imm - */ - } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM - || last_ins->opcode == OP_STORE_MEMBASE_IMM) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); - ins->opcode = OP_ICONST; - ins->inst_c0 = last_ins->inst_imm; - g_assert_not_reached (); // check this rule -#endif - } - break; - case OP_LOADI1_MEMBASE: - case OP_LOADU1_MEMBASE: - /* - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * CONV_I1/U1 reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? OP_PCONV_TO_I1 : OP_PCONV_TO_U1; - ins->sreg1 = last_ins->sreg1; - } - break; - case OP_LOADI2_MEMBASE: - case OP_LOADU2_MEMBASE: - /* - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * OP_LOAD_MEMBASE offset(basereg), reg2 - * --> - * OP_STORE_MEMBASE_REG reg1, offset(basereg) - * CONV_I2/U2 reg1, reg2 - */ - if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) && - ins->inst_basereg == last_ins->inst_destbasereg && - ins->inst_offset == last_ins->inst_offset) { - ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? OP_PCONV_TO_I2 : OP_PCONV_TO_U2; - ins->sreg1 = last_ins->sreg1; - } - break; - case OP_MOVE: - case OP_FMOVE: - /* - * Removes: - * - * OP_MOVE reg, reg - */ - if (ins->dreg == ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } - /* - * Removes: - * - * OP_MOVE sreg, dreg - * OP_MOVE dreg, sreg - */ - if (last_ins && last_ins->opcode == OP_MOVE && - ins->sreg1 == last_ins->dreg && - ins->dreg == last_ins->sreg1) { - MONO_DEL_INS (ins); - continue; - } - break; } + + mono_peephole_ins (bb, ins); } } -#define NEW_INS(cfg,ins,dest,op) do { \ - (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst)); \ - (dest)->opcode = (op); \ - MONO_INST_LIST_ADD_TAIL (&(dest)->node, &(ins)->node); \ +#define NEW_INS(cfg,ins,dest,op) do { \ + MONO_INST_NEW ((cfg), (dest), (op)); \ + (dest)->cil_code = (ins)->cil_code; \ + mono_bblock_insert_before_ins (bb, ins, (dest)); \ } while (0) /* @@ -2122,26 +2608,29 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) * description can't model some parts of the composite instructions like * cdq. */ - MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) { + MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) { switch (ins->opcode) { case OP_DIV_IMM: case OP_REM_IMM: case OP_IDIV_IMM: - case OP_IREM_IMM: case OP_IDIV_UN_IMM: case OP_IREM_UN_IMM: - NEW_INS (cfg, ins, temp, OP_ICONST); - temp->inst_c0 = ins->inst_imm; - temp->dreg = mono_regstate_next_int (cfg->rs); - ins->opcode = mono_op_imm_to_op (ins->opcode); - ins->sreg2 = temp->dreg; + mono_decompose_op_imm (cfg, bb, ins); + break; + case OP_IREM_IMM: + /* Keep the opcode if we can implement it efficiently */ + if (!((ins->inst_imm > 0) && (mono_is_power_of_two (ins->inst_imm) != -1))) + mono_decompose_op_imm (cfg, bb, ins); break; case OP_COMPARE_IMM: case OP_LCOMPARE_IMM: if (!amd64_is_imm32 (ins->inst_imm)) { NEW_INS (cfg, ins, temp, OP_I8CONST); temp->inst_c0 = ins->inst_imm; - temp->dreg = mono_regstate_next_int (cfg->rs); + if (cfg->globalra) + temp->dreg = mono_alloc_ireg (cfg); + else + temp->dreg = mono_regstate_next_int (cfg->rs); ins->opcode = OP_COMPARE; ins->sreg2 = temp->dreg; } @@ -2151,7 +2640,10 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) if (!amd64_is_imm32 (ins->inst_offset)) { NEW_INS (cfg, ins, temp, OP_I8CONST); temp->inst_c0 = ins->inst_offset; - temp->dreg = mono_regstate_next_int (cfg->rs); + if (cfg->globalra) + temp->dreg = mono_alloc_ireg (cfg); + else + temp->dreg = mono_regstate_next_int (cfg->rs); ins->opcode = OP_AMD64_LOADI8_MEMINDEX; ins->inst_indexreg = temp->dreg; } @@ -2161,7 +2653,10 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) if (!amd64_is_imm32 (ins->inst_imm)) { NEW_INS (cfg, ins, temp, OP_I8CONST); temp->inst_c0 = ins->inst_imm; - temp->dreg = mono_regstate_next_int (cfg->rs); + if (cfg->globalra) + temp->dreg = mono_alloc_ireg (cfg); + else + temp->dreg = mono_regstate_next_int (cfg->rs); ins->opcode = OP_STOREI8_MEMBASE_REG; ins->sreg1 = temp->dreg; } @@ -2199,22 +2694,7 @@ cc_signed_table [] = { static unsigned char* emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed) { - if (use_sse2) { - amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg); - } - else { - amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16); - x86_fnstcw_membase(code, AMD64_RSP, 0); - amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2); - amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00); - amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2); - amd64_fldcw_membase (code, AMD64_RSP, 2); - amd64_push_reg (code, AMD64_RAX); // SP = SP - 8 - amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8); - amd64_pop_reg (code, dreg); - amd64_fldcw_membase (code, AMD64_RSP, 0); - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16); - } + amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg); if (size == 1) amd64_widen_reg (code, dreg, dreg, is_signed, FALSE); @@ -2331,35 +2811,27 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) case OP_FCALL_REG: case OP_FCALL_MEMBASE: if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) { - if (use_sse2) - amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0); - else { - /* FIXME: optimize this */ - amd64_movss_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0); - amd64_fld_membase (code, AMD64_RSP, -8, FALSE); - } + amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0); } else { - if (use_sse2) { - if (ins->dreg != AMD64_XMM0) - amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0); - } - else { - /* FIXME: optimize this */ - amd64_movsd_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0); - amd64_fld_membase (code, AMD64_RSP, -8, TRUE); - } + if (ins->dreg != AMD64_XMM0) + amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0); } break; case OP_VCALL: case OP_VCALL_REG: case OP_VCALL_MEMBASE: - cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE); + case OP_VCALL2: + case OP_VCALL2_REG: + case OP_VCALL2_MEMBASE: + cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE); if (cinfo->ret.storage == ArgValuetypeInReg) { - /* Pop the destination address from the stack */ - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8); - amd64_pop_reg (code, AMD64_RCX); - + MonoInst *loc = cfg->arch.vret_addr_loc; + + /* Load the destination address */ + g_assert (loc->opcode == OP_REGOFFSET); + amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, 8); + for (quad = 0; quad < 2; quad ++) { switch (cinfo->ret.pair_storage [quad]) { case ArgInIReg: @@ -2385,19 +2857,25 @@ emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) } /* - * emit_tls_get: + * mono_amd64_emit_tls_get: * @code: buffer to store code to * @dreg: hard register where to place the result * @tls_offset: offset info * - * emit_tls_get emits in @code the native code that puts in the dreg register - * the item in the thread local storage identified by tls_offset. + * mono_amd64_emit_tls_get emits in @code the native code that puts in + * the dreg register the item in the thread local storage identified + * by tls_offset. * * Returns: a pointer to the end of the stored code */ -static guint8* -emit_tls_get (guint8* code, int dreg, int tls_offset) +guint8* +mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset) { +#ifdef PLATFORM_WIN32 + g_assert (tls_offset < 64); + x86_prefix (code, X86_GS_PREFIX); + amd64_mov_reg_mem (code, dreg, (tls_offset * 8) + 0x1480, 8); +#else if (optimize_for_xen) { x86_prefix (code, X86_FS_PREFIX); amd64_mov_reg_mem (code, dreg, 0, 8); @@ -2406,6 +2884,7 @@ emit_tls_get (guint8* code, int dreg, int tls_offset) x86_prefix (code, X86_FS_PREFIX); amd64_mov_reg_mem (code, dreg, tls_offset, 8); } +#endif return code; } @@ -2478,6 +2957,10 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) } } break; + case ArgValuetypeAddrInIReg: + if (ainfo->pair_storage [0] == ArgInIReg) + amd64_mov_reg_membase (code, ainfo->pair_regs [0], ins->inst_left->inst_basereg, ins->inst_left->inst_offset, sizeof (gpointer)); + break; default: break; } @@ -2511,6 +2994,8 @@ amd64_pop_reg (code, AMD64_RAX); #define LOOP_ALIGNMENT 8 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting) +#ifndef DISABLE_JIT + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -2518,6 +3003,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) MonoCallInst *call; guint offset; guint8 *code = cfg->native_code + cfg->code_len; + MonoInst *last_ins = NULL; guint last_offset = 0; int max_len, cpos; @@ -2553,6 +3039,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) mono_debug_open_block (cfg, bb, offset); + if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num) + x86_breakpoint (code); + MONO_BB_FOR_EACH_INS (bb, ins) { offset = code - cfg->native_code; @@ -2620,8 +3109,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_movsxd_reg_membase (code, ins->dreg, ins->dreg, 0); break; case OP_LOADU4_MEM: - amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0); - amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4); + // FIXME: Decompose this earlier + if (cfg->new_ir) { + if (amd64_is_imm32 (ins->inst_imm)) + amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4); + else { + amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm); + amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4); + } + } else { + amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0); + amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4); + } break; case OP_LOADU1_MEM: amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm); @@ -2643,13 +3142,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4); break; case OP_LOADU1_MEMBASE: - amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE); + /* The cpu zero extends the result into 64 bits */ + amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE, 4); break; case OP_LOADI1_MEMBASE: amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE); break; case OP_LOADU2_MEMBASE: - amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE); + /* The cpu zero extends the result into 64 bits */ + amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE, 4); break; case OP_LOADI2_MEMBASE: amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE); @@ -2765,6 +3266,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_dec_reg_size (code, ins->dreg, 4); break; case OP_X86_MUL_REG_MEMBASE: + case OP_X86_MUL_MEMBASE_REG: amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4); break; case OP_AMD64_ICOMPARE_MEMBASE_REG: @@ -2846,6 +3348,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_BREAK: amd64_breakpoint (code); break; + case OP_RELAXED_NOP: + x86_prefix (code, X86_REP_PREFIX); + x86_nop (code); + break; + case OP_HARD_NOP: + x86_nop (code); + break; case OP_NOP: case OP_DUMMY_USE: case OP_DUMMY_STORE: @@ -3003,6 +3512,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_div_reg_size (code, ins->sreg2, FALSE, 4); } break; + case OP_IREM_IMM: { + int power = mono_is_power_of_two (ins->inst_imm); + + g_assert (ins->sreg1 == X86_EAX); + g_assert (ins->dreg == X86_EAX); + g_assert (power >= 0); + + if (power == 0) { + amd64_mov_reg_imm (code, ins->dreg, 0); + break; + } + + /* Based on gcc code */ + + /* Add compensation for negative dividents */ + amd64_mov_reg_reg_size (code, AMD64_RDX, AMD64_RAX, 4); + if (power > 1) + amd64_shift_reg_imm_size (code, X86_SAR, AMD64_RDX, 31, 4); + amd64_shift_reg_imm_size (code, X86_SHR, AMD64_RDX, 32 - power, 4); + amd64_alu_reg_reg_size (code, X86_ADD, AMD64_RAX, AMD64_RDX, 4); + /* Compute remainder */ + amd64_alu_reg_imm_size (code, X86_AND, AMD64_RAX, (1 << power) - 1, 4); + /* Remove compensation */ + amd64_alu_reg_reg_size (code, X86_SUB, AMD64_RAX, AMD64_RDX, 4); + break; + } case OP_LMUL_OVF: amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException"); @@ -3207,6 +3742,31 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]); break; + case OP_CMOV_IEQ: + case OP_CMOV_IGE: + case OP_CMOV_IGT: + case OP_CMOV_ILE: + case OP_CMOV_ILT: + case OP_CMOV_INE_UN: + case OP_CMOV_IGE_UN: + case OP_CMOV_IGT_UN: + case OP_CMOV_ILE_UN: + case OP_CMOV_ILT_UN: + case OP_CMOV_LEQ: + case OP_CMOV_LGE: + case OP_CMOV_LGT: + case OP_CMOV_LLE: + case OP_CMOV_LLT: + case OP_CMOV_LNE_UN: + case OP_CMOV_LGE_UN: + case OP_CMOV_LGT_UN: + case OP_CMOV_LLE_UN: + case OP_CMOV_LLT_UN: + g_assert (ins->dreg == ins->sreg1); + /* This needs to operate on 64 bit values */ + amd64_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2); + break; + case OP_LNOT: amd64_not_reg (code, ins->sreg1); break; @@ -3225,33 +3785,24 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8); break; + case OP_JUMP_TABLE: + mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); + amd64_mov_reg_imm_size (code, ins->dreg, 0, 8); + break; case OP_MOVE: amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer)); break; case OP_AMD64_SET_XMMREG_R4: { - if (use_sse2) { - amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1); - } - else { - amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE); - /* ins->dreg is set to -1 by the reg allocator */ - amd64_movss_reg_membase (code, ins->backend.reg3, AMD64_RSP, -8); - } + amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1); break; } case OP_AMD64_SET_XMMREG_R8: { - if (use_sse2) { - if (ins->dreg != ins->sreg1) - amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1); - } - else { - amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); - /* ins->dreg is set to -1 by the reg allocator */ - amd64_movsd_reg_membase (code, ins->backend.reg3, AMD64_RSP, -8); - } + if (ins->dreg != ins->sreg1) + amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1); break; } - case OP_JMP: { + case OP_JMP: + case OP_TAILCALL: { /* * Note: this 'frame destruction' logic is useful for tail calls, too. * Keep in sync with the code in emit_epilog. @@ -3264,7 +3815,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) g_assert (!cfg->method->save_lmf); - code = emit_load_volatile_arguments (cfg, code); + if (ins->opcode == OP_JMP) + code = emit_load_volatile_arguments (cfg, code); if (cfg->arch.omit_fp) { guint32 save_offset = 0; @@ -3311,11 +3863,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8); break; } + case OP_CALL: case OP_FCALL: case OP_LCALL: case OP_VCALL: + case OP_VCALL2: case OP_VOIDCALL: - case OP_CALL: call = (MonoCallInst*)ins; /* * The AMD64 ABI forces callers to know about varargs. @@ -3340,9 +3893,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } if (ins->flags & MONO_INST_HAS_METHOD) - code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method); + code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE); else - code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr); + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage); code = emit_move_return_value (cfg, ins, code); @@ -3350,6 +3903,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_FCALL_REG: case OP_LCALL_REG: case OP_VCALL_REG: + case OP_VCALL2_REG: case OP_VOIDCALL_REG: case OP_CALL_REG: call = (MonoCallInst*)ins; @@ -3368,7 +3922,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ins->sreg1 = AMD64_R11; } amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX); + } else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) { + /* + * Since the unmanaged calling convention doesn't contain a + * 'vararg' entry, we have to treat every pinvoke call as a + * potential vararg call. + */ + guint32 nregs, i; + nregs = 0; + for (i = 0; i < AMD64_XMM_NREG; ++i) + if (call->used_fregs & (1 << i)) + nregs ++; + if (ins->sreg1 == AMD64_RAX) { + amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8); + ins->sreg1 = AMD64_R11; + } + if (!nregs) + amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX); + else + amd64_mov_reg_imm (code, AMD64_RAX, nregs); } + amd64_call_reg (code, ins->sreg1); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage); @@ -3377,6 +3951,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_FCALL_MEMBASE: case OP_LCALL_MEMBASE: case OP_VCALL_MEMBASE: + case OP_VCALL2_MEMBASE: case OP_VOIDCALL_MEMBASE: case OP_CALL_MEMBASE: call = (MonoCallInst*)ins; @@ -3390,6 +3965,31 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ins->sreg1 = AMD64_RAX; } + if (call->method && ins->inst_offset < 0) { + gssize val; + + /* + * This is a possible IMT call so save the IMT method in the proper + * register. We don't use the generic code in method-to-ir.c, because + * we need to disassemble this in get_vcall_slot_addr (), so we have to + * maintain control over the layout of the code. + * Also put the base reg in %rax to simplify find_imt_method (). + */ + if (ins->sreg1 != AMD64_RAX) { + amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8); + ins->sreg1 = AMD64_RAX; + } + val = (gssize)(gpointer)call->method; + + // FIXME: Generics sharing +#if 0 + if ((((guint64)val) >> 32) == 0) + amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_REG, val, 4); + else + amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_REG, val, 8); +#endif + } + amd64_call_membase (code, ins->sreg1, ins->inst_offset); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage); @@ -3409,8 +4009,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_X86_PUSH_MEMBASE: amd64_push_membase (code, ins->inst_basereg, ins->inst_offset); break; - case OP_X86_PUSH_OBJ: - amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm); + case OP_X86_PUSH_OBJ: { + int size = ALIGN_TO (ins->inst_imm, 8); + amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size); amd64_push_reg (code, AMD64_RDI); amd64_push_reg (code, AMD64_RSI); amd64_push_reg (code, AMD64_RCX); @@ -3418,8 +4019,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset); else amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8); - amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8); - amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3)); + amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, (3 * 8)); + amd64_mov_reg_imm (code, AMD64_RCX, (size >> 3)); amd64_cld (code); amd64_prefix (code, X86_REP_PREFIX); amd64_movsd (code); @@ -3427,6 +4028,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_pop_reg (code, AMD64_RSI); amd64_pop_reg (code, AMD64_RDI); break; + } case OP_X86_LEA: amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount); break; @@ -3448,12 +4050,22 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1); if (ins->flags & MONO_INST_INIT) { - /* FIXME: Optimize this */ - amd64_mov_reg_imm (code, ins->dreg, size); - ins->sreg1 = ins->dreg; + if (size < 64) { + int i; - code = mono_emit_stack_alloc (code, ins); - amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8); + amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size); + amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); + + for (i = 0; i < size; i += 8) + amd64_mov_membase_reg (code, AMD64_RSP, i, ins->dreg, 8); + amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8); + } else { + amd64_mov_reg_imm (code, ins->dreg, size); + ins->sreg1 = ins->dreg; + + code = mono_emit_stack_alloc (code, ins); + amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8); + } } else { amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size); amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8); @@ -3463,13 +4075,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_THROW: { amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, - (gpointer)"mono_arch_throw_exception"); + (gpointer)"mono_arch_throw_exception", FALSE); break; } case OP_RETHROW: { amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, - (gpointer)"mono_arch_rethrow_exception"); + (gpointer)"mono_arch_rethrow_exception", FALSE); break; } case OP_CALL_HANDLER: @@ -3503,6 +4115,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ins->inst_c0 = code - cfg->native_code; break; case OP_BR: + //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins); + //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins) + //break; if (ins->flags & MONO_INST_BRLABEL) { if (ins->inst_i0->inst_c0) { amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0); @@ -3589,112 +4204,53 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_R8CONST: { double d = *(double *)ins->inst_p0; - if (use_sse2) { - if ((d == 0.0) && (mono_signbit (d) == 0)) { - amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg); - } - else { - mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0); - amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0); - } + if ((d == 0.0) && (mono_signbit (d) == 0)) { + amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg); } - else if ((d == 0.0) && (mono_signbit (d) == 0)) { - amd64_fldz (code); - } else if (d == 1.0) { - x86_fld1 (code); - } else { + else { mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0); - amd64_fld_membase (code, AMD64_RIP, 0, TRUE); + amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0); } break; } case OP_R4CONST: { float f = *(float *)ins->inst_p0; - if (use_sse2) { - if ((f == 0.0) && (mono_signbit (f) == 0)) { - amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg); - } - else { - mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0); - amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0); - amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg); - } + if ((f == 0.0) && (mono_signbit (f) == 0)) { + amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg); } - else if ((f == 0.0) && (mono_signbit (f) == 0)) { - amd64_fldz (code); - } else if (f == 1.0) { - x86_fld1 (code); - } else { + else { mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0); - amd64_fld_membase (code, AMD64_RIP, 0, FALSE); + amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0); + amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg); } break; } case OP_STORER8_MEMBASE_REG: - if (use_sse2) - amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1); - else - amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE); + amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1); break; case OP_LOADR8_SPILL_MEMBASE: - if (use_sse2) - g_assert_not_reached (); - amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); - amd64_fxch (code, 1); + g_assert_not_reached (); break; case OP_LOADR8_MEMBASE: - if (use_sse2) - amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset); - else - amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); + amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset); break; case OP_STORER4_MEMBASE_REG: - if (use_sse2) { - /* This requires a double->single conversion */ - amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1); - amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15); - } - else - amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE); + /* This requires a double->single conversion */ + amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1); + amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15); break; case OP_LOADR4_MEMBASE: - if (use_sse2) { - amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset); - amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg); - } - else - amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE); + amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset); + amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg); break; case OP_ICONV_TO_R4: /* FIXME: change precision */ case OP_ICONV_TO_R8: - if (use_sse2) - amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4); - else { - amd64_push_reg (code, ins->sreg1); - amd64_fild_membase (code, AMD64_RSP, 0, FALSE); - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8); - } + amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4); break; case OP_LCONV_TO_R4: /* FIXME: change precision */ case OP_LCONV_TO_R8: - if (use_sse2) - amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1); - else { - amd64_push_reg (code, ins->sreg1); - amd64_fild_membase (code, AMD64_RSP, 0, TRUE); - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8); - } - break; - case OP_X86_FP_LOAD_I8: - if (use_sse2) - g_assert_not_reached (); - amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); - break; - case OP_X86_FP_LOAD_I4: - if (use_sse2) - g_assert_not_reached (); - amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE); + amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1); break; case OP_FCONV_TO_R4: /* FIXME: nothing to do ?? */ @@ -3722,61 +4278,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE); break; case OP_LCONV_TO_R_UN: { - static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 }; guint8 *br [2]; - if (use_sse2) { - /* Based on gcc code */ - amd64_test_reg_reg (code, ins->sreg1, ins->sreg1); - br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE); - - /* Positive case */ - amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1); - br [1] = code; x86_jump8 (code, 0); - amd64_patch (br [0], code); - - /* Negative case */ - /* Save to the red zone */ - amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8); - amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8); - amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8); - amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8); - amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1); - amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1); - amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX); - amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX); - amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg); - /* Restore */ - amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8); - amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8); - amd64_patch (br [1], code); - - break; - } - - /* load 64bit integer to FP stack */ - amd64_push_imm (code, 0); - amd64_push_reg (code, ins->sreg2); - amd64_push_reg (code, ins->sreg1); - amd64_fild_membase (code, AMD64_RSP, 0, TRUE); - /* store as 80bit FP value */ - x86_fst80_membase (code, AMD64_RSP, 0); - - /* test if lreg is negative */ - amd64_test_reg_reg (code, ins->sreg2, ins->sreg2); - br [0] = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE); - - /* add correction constant mn */ - x86_fld80_mem (code, (gssize)mn); - x86_fld80_membase (code, AMD64_RSP, 0); - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); - x86_fst80_membase (code, AMD64_RSP, 0); + /* Based on gcc code */ + amd64_test_reg_reg (code, ins->sreg1, ins->sreg1); + br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE); + /* Positive case */ + amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1); + br [1] = code; x86_jump8 (code, 0); amd64_patch (br [0], code); - x86_fld80_membase (code, AMD64_RSP, 0); - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12); - + /* Negative case */ + /* Save to the red zone */ + amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8); + amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8); + amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8); + amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8); + amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1); + amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1); + amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX); + amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX); + amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg); + /* Restore */ + amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8); + amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8); + amd64_patch (br [1], code); break; } case OP_LCONV_TO_OVF_U4: @@ -3790,123 +4317,47 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8); break; case OP_FMOVE: - if (use_sse2 && (ins->dreg != ins->sreg1)) + if (ins->dreg != ins->sreg1) amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1); break; case OP_FADD: - if (use_sse2) - amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2); - else - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); + amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2); break; case OP_FSUB: - if (use_sse2) - amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2); - else - amd64_fp_op_reg (code, X86_FSUB, 1, TRUE); + amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2); break; case OP_FMUL: - if (use_sse2) - amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2); - else - amd64_fp_op_reg (code, X86_FMUL, 1, TRUE); + amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2); break; case OP_FDIV: - if (use_sse2) - amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2); - else - amd64_fp_op_reg (code, X86_FDIV, 1, TRUE); + amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2); break; - case OP_FNEG: - if (use_sse2) { - static double r8_0 = -0.0; + case OP_FNEG: { + static double r8_0 = -0.0; - g_assert (ins->sreg1 == ins->dreg); + g_assert (ins->sreg1 == ins->dreg); - mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0); - amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0); - } - else - amd64_fchs (code); - break; + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0); + amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0); + break; + } case OP_SIN: - if (use_sse2) { - EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1); - } - else { - amd64_fsin (code); - amd64_fldz (code); - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); - } + EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1); break; case OP_COS: - if (use_sse2) { - EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1); - } - else { - amd64_fcos (code); - amd64_fldz (code); - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); - } + EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1); break; - case OP_ABS: - if (use_sse2) { - EMIT_SSE2_FPFUNC (code, fabs, ins->dreg, ins->sreg1); - } - else - amd64_fabs (code); + case OP_ABS: { + static guint64 d = 0x7fffffffffffffffUL; + + g_assert (ins->sreg1 == ins->dreg); + + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &d); + amd64_sse_andpd_reg_membase (code, ins->dreg, AMD64_RIP, 0); break; - case OP_TAN: { - /* - * it really doesn't make sense to inline all this code, - * it's here just to show that things may not be as simple - * as they appear. - */ - guchar *check_pos, *end_tan, *pop_jump; - if (use_sse2) - g_assert_not_reached (); - amd64_push_reg (code, AMD64_RAX); - amd64_fptan (code); - amd64_fnstsw (code); - amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2); - check_pos = code; - x86_branch8 (code, X86_CC_NE, 0, FALSE); - amd64_fstp (code, 0); /* pop the 1.0 */ - end_tan = code; - x86_jump8 (code, 0); - amd64_fldpi (code); - amd64_fp_op (code, X86_FADD, 0); - amd64_fxch (code, 1); - x86_fprem1 (code); - amd64_fstsw (code); - amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2); - pop_jump = code; - x86_branch8 (code, X86_CC_NE, 0, FALSE); - amd64_fstp (code, 1); - amd64_fptan (code); - amd64_patch (pop_jump, code); - amd64_fstp (code, 0); /* pop the 1.0 */ - amd64_patch (check_pos, code); - amd64_patch (end_tan, code); - amd64_fldz (code); - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); - amd64_pop_reg (code, AMD64_RAX); - break; } - case OP_ATAN: - if (use_sse2) - g_assert_not_reached (); - x86_fld1 (code); - amd64_fpatan (code); - amd64_fldz (code); - amd64_fp_op_reg (code, X86_FADD, 1, TRUE); - break; case OP_SQRT: - if (use_sse2) { - EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1); - } - else - amd64_fsqrt (code); + EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1); break; case OP_IMIN: g_assert (cfg->opt & MONO_OPT_CMOV); @@ -3914,201 +4365,109 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4); amd64_cmov_reg_size (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2, 4); break; + case OP_IMIN_UN: + g_assert (cfg->opt & MONO_OPT_CMOV); + g_assert (ins->dreg == ins->sreg1); + amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4); + amd64_cmov_reg_size (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2, 4); + break; case OP_IMAX: g_assert (cfg->opt & MONO_OPT_CMOV); g_assert (ins->dreg == ins->sreg1); amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4); amd64_cmov_reg_size (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2, 4); break; + case OP_IMAX_UN: + g_assert (cfg->opt & MONO_OPT_CMOV); + g_assert (ins->dreg == ins->sreg1); + amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4); + amd64_cmov_reg_size (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2, 4); + break; case OP_LMIN: g_assert (cfg->opt & MONO_OPT_CMOV); g_assert (ins->dreg == ins->sreg1); amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2); amd64_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2); break; + case OP_LMIN_UN: + g_assert (cfg->opt & MONO_OPT_CMOV); + g_assert (ins->dreg == ins->sreg1); + amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2); + amd64_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2); + break; case OP_LMAX: g_assert (cfg->opt & MONO_OPT_CMOV); g_assert (ins->dreg == ins->sreg1); amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2); amd64_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2); + break; + case OP_LMAX_UN: + g_assert (cfg->opt & MONO_OPT_CMOV); + g_assert (ins->dreg == ins->sreg1); + amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2); + amd64_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2); break; case OP_X86_FPOP: - if (!use_sse2) - amd64_fstp (code, 0); break; - case OP_FREM: { - guint8 *l1, *l2; - - if (use_sse2) - g_assert_not_reached (); - amd64_push_reg (code, AMD64_RAX); - /* we need to exchange ST(0) with ST(1) */ - amd64_fxch (code, 1); - - /* this requires a loop, because fprem somtimes - * returns a partial remainder */ - l1 = code; - /* looks like MS is using fprem instead of the IEEE compatible fprem1 */ - /* x86_fprem1 (code); */ - amd64_fprem (code); - amd64_fnstsw (code); - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2); - l2 = code + 2; - x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE); - - /* pop result */ - amd64_fstp (code, 1); - - amd64_pop_reg (code, AMD64_RAX); - break; - } case OP_FCOMPARE: - if (use_sse2) { - /* - * The two arguments are swapped because the fbranch instructions - * depend on this for the non-sse case to work. - */ - amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); - break; - } - if (cfg->opt & MONO_OPT_FCMOV) { - amd64_fcomip (code, 1); - amd64_fstp (code, 0); - break; - } - /* this overwrites EAX */ - EMIT_FPCOMPARE(code); - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK); - break; - case OP_FCEQ: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - /* zeroing the register at the start results in - * shorter and faster code (we can also remove the widening op) - */ - guchar *unordered_check; - amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); - - if (use_sse2) - amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2); - else { - amd64_fcomip (code, 1); - amd64_fstp (code, 0); - } - unordered_check = code; - x86_branch8 (code, X86_CC_P, 0, FALSE); - amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE); - amd64_patch (unordered_check, code); - break; - } - if (ins->dreg != AMD64_RAX) - amd64_push_reg (code, AMD64_RAX); - - EMIT_FPCOMPARE(code); - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000); - amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); - - if (ins->dreg != AMD64_RAX) - amd64_pop_reg (code, AMD64_RAX); + /* + * The two arguments are swapped because the fbranch instructions + * depend on this for the non-sse case to work. + */ + amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); + break; + case OP_FCEQ: { + /* zeroing the register at the start results in + * shorter and faster code (we can also remove the widening op) + */ + guchar *unordered_check; + amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); + amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2); + unordered_check = code; + x86_branch8 (code, X86_CC_P, 0, FALSE); + amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE); + amd64_patch (unordered_check, code); break; + } case OP_FCLT: case OP_FCLT_UN: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - /* zeroing the register at the start results in - * shorter and faster code (we can also remove the widening op) - */ - amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); - if (use_sse2) - amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); - else { - amd64_fcomip (code, 1); - amd64_fstp (code, 0); - } - if (ins->opcode == OP_FCLT_UN) { - guchar *unordered_check = code; - guchar *jump_to_end; - x86_branch8 (code, X86_CC_P, 0, FALSE); - amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE); - jump_to_end = code; - x86_jump8 (code, 0); - amd64_patch (unordered_check, code); - amd64_inc_reg (code, ins->dreg); - amd64_patch (jump_to_end, code); - } else { - amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE); - } - break; - } - if (ins->dreg != AMD64_RAX) - amd64_push_reg (code, AMD64_RAX); - - EMIT_FPCOMPARE(code); - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK); + /* zeroing the register at the start results in + * shorter and faster code (we can also remove the widening op) + */ + amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); + amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); if (ins->opcode == OP_FCLT_UN) { - guchar *is_not_zero_check, *end_jump; - is_not_zero_check = code; - x86_branch8 (code, X86_CC_NZ, 0, TRUE); - end_jump = code; + guchar *unordered_check = code; + guchar *jump_to_end; + x86_branch8 (code, X86_CC_P, 0, FALSE); + amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE); + jump_to_end = code; x86_jump8 (code, 0); - amd64_patch (is_not_zero_check, code); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK); - - amd64_patch (end_jump, code); + amd64_patch (unordered_check, code); + amd64_inc_reg (code, ins->dreg); + amd64_patch (jump_to_end, code); + } else { + amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE); } - amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); - - if (ins->dreg != AMD64_RAX) - amd64_pop_reg (code, AMD64_RAX); break; case OP_FCGT: - case OP_FCGT_UN: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - /* zeroing the register at the start results in - * shorter and faster code (we can also remove the widening op) - */ - guchar *unordered_check; - amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); - if (use_sse2) - amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); - else { - amd64_fcomip (code, 1); - amd64_fstp (code, 0); - } - if (ins->opcode == OP_FCGT) { - unordered_check = code; - x86_branch8 (code, X86_CC_P, 0, FALSE); - amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE); - amd64_patch (unordered_check, code); - } else { - amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE); - } - break; - } - if (ins->dreg != AMD64_RAX) - amd64_push_reg (code, AMD64_RAX); - - EMIT_FPCOMPARE(code); - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0); - if (ins->opcode == OP_FCGT_UN) { - guchar *is_not_zero_check, *end_jump; - is_not_zero_check = code; - x86_branch8 (code, X86_CC_NZ, 0, TRUE); - end_jump = code; - x86_jump8 (code, 0); - amd64_patch (is_not_zero_check, code); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK); - - amd64_patch (end_jump, code); + case OP_FCGT_UN: { + /* zeroing the register at the start results in + * shorter and faster code (we can also remove the widening op) + */ + guchar *unordered_check; + amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); + amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1); + if (ins->opcode == OP_FCGT) { + unordered_check = code; + x86_branch8 (code, X86_CC_P, 0, FALSE); + amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE); + amd64_patch (unordered_check, code); + } else { + amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE); } - amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); - - if (ins->dreg != AMD64_RAX) - amd64_pop_reg (code, AMD64_RAX); break; + } case OP_FCLT_MEMBASE: case OP_FCGT_MEMBASE: case OP_FCLT_UN_MEMBASE: @@ -4116,7 +4475,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_FCEQ_MEMBASE: { guchar *unordered_check, *jump_to_end; int x86_cond; - g_assert (use_sse2); amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset); @@ -4160,162 +4518,94 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; } - case OP_FBEQ: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - guchar *jump = code; - x86_branch8 (code, X86_CC_P, 0, TRUE); - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); - amd64_patch (jump, code); - break; - } - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000); - EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE); + case OP_FBEQ: { + guchar *jump = code; + x86_branch8 (code, X86_CC_P, 0, TRUE); + EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); + amd64_patch (jump, code); break; + } case OP_FBNE_UN: /* Branch if C013 != 100 */ - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - /* branch if !ZF or (PF|CF) */ - EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); - EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); - EMIT_COND_BRANCH (ins, X86_CC_B, FALSE); - break; - } - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3); + /* branch if !ZF or (PF|CF) */ EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_B, FALSE); break; case OP_FBLT: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); - break; - } - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); break; case OP_FBLT_UN: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); - EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); - break; - } - if (ins->opcode == OP_FBLT_UN) { - guchar *is_not_zero_check, *end_jump; - is_not_zero_check = code; - x86_branch8 (code, X86_CC_NZ, 0, TRUE); - end_jump = code; - x86_jump8 (code, 0); - amd64_patch (is_not_zero_check, code); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK); - - amd64_patch (end_jump, code); - } - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); break; case OP_FBGT: case OP_FBGT_UN: - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - if (ins->opcode == OP_FBGT) { - guchar *br1; - - /* skip branch if C1=1 */ - br1 = code; - x86_branch8 (code, X86_CC_P, 0, FALSE); - /* branch if (C0 | C3) = 1 */ - EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE); - amd64_patch (br1, code); - break; - } else { - EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE); - } - break; - } - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0); - if (ins->opcode == OP_FBGT_UN) { - guchar *is_not_zero_check, *end_jump; - is_not_zero_check = code; - x86_branch8 (code, X86_CC_NZ, 0, TRUE); - end_jump = code; - x86_jump8 (code, 0); - amd64_patch (is_not_zero_check, code); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK); - - amd64_patch (end_jump, code); - } - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); - break; - case OP_FBGE: - /* Branch if C013 == 100 or 001 */ - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { + if (ins->opcode == OP_FBGT) { guchar *br1; /* skip branch if C1=1 */ br1 = code; x86_branch8 (code, X86_CC_P, 0, FALSE); /* branch if (C0 | C3) = 1 */ - EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE); amd64_patch (br1, code); break; + } else { + EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE); } - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0); - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3); - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; + case OP_FBGE: { + /* Branch if C013 == 100 or 001 */ + guchar *br1; + + /* skip branch if C1=1 */ + br1 = code; + x86_branch8 (code, X86_CC_P, 0, FALSE); + /* branch if (C0 | C3) = 1 */ + EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE); + amd64_patch (br1, code); + break; + } case OP_FBGE_UN: /* Branch if C013 == 000 */ - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE); - break; - } - EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE); break; - case OP_FBLE: + case OP_FBLE: { /* Branch if C013=000 or 100 */ - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - guchar *br1; + guchar *br1; - /* skip branch if C1=1 */ - br1 = code; - x86_branch8 (code, X86_CC_P, 0, FALSE); - /* branch if C0=0 */ - EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE); - amd64_patch (br1, code); - break; - } - amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1)); - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0); - EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); + /* skip branch if C1=1 */ + br1 = code; + x86_branch8 (code, X86_CC_P, 0, FALSE); + /* branch if C0=0 */ + EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE); + amd64_patch (br1, code); break; + } case OP_FBLE_UN: /* Branch if C013 != 001 */ - if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) { - EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); - EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE); - break; - } - amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0); - EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); + EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE); break; case OP_CKFINITE: - if (use_sse2) { - /* Transfer value to the fp stack */ - amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16); - amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1); - amd64_fld_membase (code, AMD64_RSP, 0, TRUE); - } + /* Transfer value to the fp stack */ + amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16); + amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1); + amd64_fld_membase (code, AMD64_RSP, 0, TRUE); + amd64_push_reg (code, AMD64_RAX); amd64_fxam (code); amd64_fnstsw (code); amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100); amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0); amd64_pop_reg (code, AMD64_RAX); - if (use_sse2) { - amd64_fstp (code, 0); - } + amd64_fstp (code, 0); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException"); - if (use_sse2) - amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16); + amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16); break; case OP_TLS_GET: { - code = emit_tls_get (code, ins->dreg, ins->inst_offset); + code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset); break; } case OP_MEMORY_BARRIER: { @@ -4361,14 +4651,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; } case OP_ATOMIC_EXCHANGE_I4: - case OP_ATOMIC_EXCHANGE_I8: { + case OP_ATOMIC_EXCHANGE_I8: + case OP_ATOMIC_CAS_IMM_I4: { guchar *br[2]; int sreg2 = ins->sreg2; int breg = ins->inst_basereg; - guint32 size = (ins->opcode == OP_ATOMIC_EXCHANGE_I4) ? 4 : 8; + guint32 size; + gboolean need_push = FALSE, rdx_pushed = FALSE; + + if (ins->opcode == OP_ATOMIC_EXCHANGE_I8) + size = 8; + else + size = 4; /* - * See http://msdn.microsoft.com/msdnmag/issues/0700/Win32/ for + * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for * an explanation of how this works. */ @@ -4376,35 +4673,50 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) * hack to overcome limits in x86 reg allocator * (req: dreg == eax and sreg2 != eax and breg != eax) */ + g_assert (ins->dreg == AMD64_RAX); + + if (breg == AMD64_RAX && ins->sreg2 == AMD64_RAX) + /* Highly unlikely, but possible */ + need_push = TRUE; + /* The pushes invalidate rsp */ - if ((breg == AMD64_RAX) || (breg == AMD64_RSP)) { + if ((breg == AMD64_RAX) || need_push) { amd64_mov_reg_reg (code, AMD64_R11, breg, 8); breg = AMD64_R11; } - if (ins->dreg != AMD64_RAX) - amd64_push_reg (code, AMD64_RAX); - - /* We need the EAX reg for the cmpxchg */ + /* We need the EAX reg for the comparand */ if (ins->sreg2 == AMD64_RAX) { - amd64_push_reg (code, AMD64_RDX); - amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size); - sreg2 = AMD64_RDX; + if (breg != AMD64_R11) { + amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8); + sreg2 = AMD64_R11; + } else { + g_assert (need_push); + amd64_push_reg (code, AMD64_RDX); + amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size); + sreg2 = AMD64_RDX; + rdx_pushed = TRUE; + } } - amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size); + if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) { + if (ins->backend.data == NULL) + amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX); + else + amd64_mov_reg_imm (code, AMD64_RAX, ins->backend.data); - br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX); - amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size); - br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE); - amd64_patch (br [1], br [0]); + amd64_prefix (code, X86_LOCK_PREFIX); + amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size); + } else { + amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size); - if (ins->dreg != AMD64_RAX) { - amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size); - amd64_pop_reg (code, AMD64_RAX); + br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX); + amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size); + br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE); + amd64_patch (br [1], br [0]); } - if (ins->sreg2 != sreg2) + if (rdx_pushed) amd64_pop_reg (code, AMD64_RDX); break; @@ -4422,15 +4734,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) cpos += max_len; + last_ins = ins; last_offset = offset; } cfg->code_len = code - cfg->native_code; } +#endif /* DISABLE_JIT */ + void mono_arch_register_lowlevel_calls (void) { + /* The signature doesn't matter */ + mono_register_jit_icall (mono_amd64_throw_exception, "mono_amd64_throw_exception", mono_create_icall_signature ("void"), TRUE); } void @@ -4497,6 +4814,25 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono } } +static int +get_max_epilog_size (MonoCompile *cfg) +{ + int max_epilog_size = 16; + + if (cfg->method->save_lmf) + max_epilog_size += 256; + + if (mono_jit_trace_calls != NULL) + max_epilog_size += 50; + + if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE) + max_epilog_size += 50; + + max_epilog_size += (AMD64_NREG * 2); + + return max_epilog_size; +} + /* * This macro is used for testing whenever the unwinder works correctly at every point * where an async exception can happen. @@ -4517,7 +4853,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) MonoBasicBlock *bb; MonoMethodSignature *sig; MonoInst *ins; - int alloc_size, pos, max_offset, i, quad; + int alloc_size, pos, max_offset, i, quad, max_epilog_size; guint8 *code; CallInfo *cinfo; gint32 lmf_offset = cfg->arch.lmf_offset; @@ -4540,9 +4876,11 @@ mono_arch_emit_prolog (MonoCompile *cfg) * - push rbp, mov rbp, rsp * - save callee saved regs using pushes * - allocate frame + * - save rgctx if needed * - save lmf if needed * FP not present: * - allocate frame + * - save rgctx if needed * - save lmf if needed * - save callee saved regs using moves */ @@ -4552,8 +4890,15 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (!cfg->arch.omit_fp) { amd64_push_reg (code, AMD64_RBP); async_exc_point (code); +#ifdef PLATFORM_WIN32 + mono_arch_unwindinfo_add_push_nonvol (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP); +#endif + amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer)); async_exc_point (code); +#ifdef PLATFORM_WIN32 + mono_arch_unwindinfo_add_set_fpreg (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP); +#endif } /* Save callee saved registers */ @@ -4591,12 +4936,21 @@ mono_arch_emit_prolog (MonoCompile *cfg) while (remaining_size >= 0x1000) { amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000); async_exc_point (code); +#ifdef PLATFORM_WIN32 + if (cfg->arch.omit_fp) + mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, 0x1000); +#endif + amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP); remaining_size -= 0x1000; } if (remaining_size) { amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size); async_exc_point (code); +#ifdef PLATFORM_WIN32 + if (cfg->arch.omit_fp) + mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, remaining_size); +#endif } #else amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size); @@ -4633,7 +4987,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) /* Save callee saved registers */ if (cfg->arch.omit_fp && !method->save_lmf) { - gint32 save_area_offset = 0; + gint32 save_area_offset = cfg->arch.reg_save_area_offset; /* Save caller saved registers after sp is adjusted */ /* The registers are saved at the bottom of the frame */ @@ -4646,10 +5000,20 @@ mono_arch_emit_prolog (MonoCompile *cfg) } } + /* store runtime generic context */ + if (cfg->rgctx_var) { + g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && + (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP)); + + amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 8); + } + /* compute max_offset in order to use short forward jumps */ max_offset = 0; + max_epilog_size = get_max_epilog_size (cfg); if (cfg->opt & MONO_OPT_BRANCH) { for (bb = cfg->bb_entry; bb; bb = bb->next_bb) { + MonoInst *ins; bb->max_offset = max_offset; if (cfg->prof_options & MONO_PROFILE_COVERAGE) @@ -4664,6 +5028,10 @@ mono_arch_emit_prolog (MonoCompile *cfg) max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN]; } + + if (mono_jit_trace_calls && bb == cfg->bb_exit) + /* The tracing code can be quite large */ + max_offset += max_epilog_size; } } @@ -4697,6 +5065,38 @@ mono_arch_emit_prolog (MonoCompile *cfg) stack_offset = ainfo->offset + ARGS_OFFSET; + if (cfg->globalra) { + /* All the other moves are done by the register allocator */ + switch (ainfo->storage) { + case ArgInFloatSSEReg: + amd64_sse_cvtss2sd_reg_reg (code, ainfo->reg, ainfo->reg); + break; + case ArgValuetypeInReg: + for (quad = 0; quad < 2; quad ++) { + switch (ainfo->pair_storage [quad]) { + case ArgInIReg: + amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer)); + break; + case ArgInFloatSSEReg: + amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + break; + case ArgInDoubleSSEReg: + amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]); + break; + case ArgNone: + break; + default: + g_assert_not_reached (); + } + } + break; + default: + break; + } + + continue; + } + /* Save volatile arguments to the stack */ if (ins->opcode != OP_REGVAR) { switch (ainfo->storage) { @@ -4742,6 +5142,10 @@ mono_arch_emit_prolog (MonoCompile *cfg) } } break; + case ArgValuetypeAddrInIReg: + if (ainfo->pair_storage [0] == ArgInIReg) + amd64_mov_membase_reg (code, ins->inst_left->inst_basereg, ins->inst_left->inst_offset, ainfo->pair_regs [0], sizeof (gpointer)); + break; default: break; } @@ -4773,7 +5177,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) { guint8 *buf, *no_domain_branch; - code = emit_tls_get (code, AMD64_RAX, appdomain_tls_offset); + code = mono_amd64_emit_tls_get (code, AMD64_RAX, appdomain_tls_offset); if ((domain >> 32) == 0) amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4); else @@ -4781,20 +5185,27 @@ mono_arch_emit_prolog (MonoCompile *cfg) amd64_alu_reg_reg (code, X86_CMP, AMD64_RAX, AMD64_ARG_REG1); no_domain_branch = code; x86_branch8 (code, X86_CC_NE, 0, 0); - code = emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset); + code = mono_amd64_emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset); amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX); buf = code; x86_branch8 (code, X86_CC_NE, 0, 0); amd64_patch (no_domain_branch, code); - code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach"); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, + (gpointer)"mono_jit_thread_attach", TRUE); amd64_patch (buf, code); +#ifdef PLATFORM_WIN32 + /* The TLS key actually contains a pointer to the MonoJitTlsData structure */ + /* FIXME: Add a separate key for LMF to avoid this */ + amd64_alu_reg_imm (code, X86_ADD, AMD64_RAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf)); +#endif } else { g_assert (!cfg->compile_aot); if ((domain >> 32) == 0) amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4); else amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8); - code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach"); + code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, + (gpointer)"mono_jit_thread_attach", TRUE); } } @@ -4822,7 +5233,12 @@ mono_arch_emit_prolog (MonoCompile *cfg) } else { if (lmf_addr_tls_offset != -1) { /* Load lmf quicky using the FS register */ - code = emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset); + code = mono_amd64_emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset); +#ifdef PLATFORM_WIN32 + /* The TLS key actually contains a pointer to the MonoJitTlsData structure */ + /* FIXME: Add a separate key for LMF to avoid this */ + amd64_alu_reg_imm (code, X86_ADD, AMD64_RAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf)); +#endif } else { /* @@ -4831,7 +5247,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) */ args_clobbered = TRUE; code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, - (gpointer)"mono_get_lmf_addr"); + (gpointer)"mono_get_lmf_addr", TRUE); } /* Save lmf_addr */ @@ -4863,10 +5279,10 @@ mono_arch_emit_prolog (MonoCompile *cfg) MonoBasicBlock *first_bb = cfg->bb_entry; MonoInst *next; - next = mono_inst_list_first (&first_bb->ins_list); + next = mono_bb_first_ins (first_bb); if (!next && first_bb->next_bb) { first_bb = first_bb->next_bb; - next = mono_inst_list_first (&first_bb->ins_list); + next = mono_bb_first_ins (first_bb); } if (first_bb->in_count > 1) @@ -4912,7 +5328,8 @@ mono_arch_emit_prolog (MonoCompile *cfg) } if (match) { - next = mono_inst_list_next (&next->node, &first_bb->ins_list); + next = next->next; + //next = mono_inst_list_next (&next->node, &first_bb->ins_list); if (!next) break; } @@ -4932,20 +5349,11 @@ mono_arch_emit_epilog (MonoCompile *cfg) MonoMethod *method = cfg->method; int quad, pos, i; guint8 *code; - int max_epilog_size = 16; + int max_epilog_size; CallInfo *cinfo; gint32 lmf_offset = cfg->arch.lmf_offset; - if (cfg->method->save_lmf) - max_epilog_size += 256; - - if (mono_jit_trace_calls != NULL) - max_epilog_size += 50; - - if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE) - max_epilog_size += 50; - - max_epilog_size += (AMD64_NREG * 2); + max_epilog_size = get_max_epilog_size (cfg); while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) { cfg->code_size *= 2; @@ -4962,6 +5370,23 @@ mono_arch_emit_epilog (MonoCompile *cfg) pos = 0; if (method->save_lmf) { + /* check if we need to restore protection of the stack after a stack overflow */ + if (mono_get_jit_tls_offset () != -1) { + guint8 *patch; + code = mono_amd64_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ()); + /* we load the value in a separate instruction: this mechanism may be + * used later as a safer way to do thread interruption + */ + amd64_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 8); + x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0); + patch = code; + x86_branch8 (code, X86_CC_Z, 0, FALSE); + /* note that the call trampoline will preserve eax/edx */ + x86_call_reg (code, X86_ECX); + x86_patch (patch, code); + } else { + /* FIXME: maybe save the jit tls in the prolog */ + } if ((lmf_tls_offset != -1) && !optimize_for_xen) { /* * Optimized version which uses the mono_lmf TLS variable instead of indirection @@ -5000,7 +5425,7 @@ mono_arch_emit_epilog (MonoCompile *cfg) } else { if (cfg->arch.omit_fp) { - gint32 save_area_offset = 0; + gint32 save_area_offset = cfg->arch.reg_save_area_offset; for (i = 0; i < AMD64_NREG; ++i) if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) { @@ -5145,11 +5570,10 @@ mono_arch_emit_exceptions (MonoCompile *cfg) exc_throw_start [nthrows] = code; } amd64_mov_reg_imm (code, AMD64_ARG_REG1, exc_class->type_token); - patch_info->data.name = "mono_arch_throw_corlib_exception"; - patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD; - patch_info->ip.i = code - cfg->native_code; - code = emit_call_body (cfg, code, patch_info->type, patch_info->data.name); + patch_info->type = MONO_PATCH_INFO_NONE; + + code = emit_call_body (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_arch_throw_corlib_exception"); amd64_mov_reg_imm (buf, AMD64_ARG_REG2, (code - cfg->native_code) - throw_ip); while (buf < buf2) @@ -5177,24 +5601,15 @@ mono_arch_emit_exceptions (MonoCompile *cfg) case MONO_PATCH_INFO_R4: { guint8 *pos; - if (use_sse2) { - /* The SSE opcodes require a 16 byte alignment */ - code = (guint8*)ALIGN_TO (code, 16); - } else { - code = (guint8*)ALIGN_TO (code, 8); - } + /* The SSE opcodes require a 16 byte alignment */ + code = (guint8*)ALIGN_TO (code, 16); pos = cfg->native_code + patch_info->ip.i; - - if (use_sse2) { - if (IS_REX (pos [1])) - *(guint32*)(pos + 5) = (guint8*)code - pos - 9; - else - *(guint32*)(pos + 4) = (guint8*)code - pos - 8; - } else { - *(guint32*)(pos + 3) = (guint8*)code - pos - 7; - } + if (IS_REX (pos [1])) + *(guint32*)(pos + 5) = (guint8*)code - pos - 9; + else + *(guint32*)(pos + 4) = (guint8*)code - pos - 8; if (patch_info->type == MONO_PATCH_INFO_R8) { *(double*)code = *(double*)patch_info->data.target; @@ -5245,7 +5660,7 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean ena /* Allocate a new area on the stack and save arguments there */ sig = mono_method_signature (cfg->method); - cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE); + cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE); n = sig->param_count + sig->hasthis; @@ -5268,7 +5683,7 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean ena mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method); amd64_set_reg_template (code, AMD64_ARG_REG1); amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RSP, 8); - code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func); + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE); if (enable_arguments) amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area); @@ -5290,7 +5705,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena guchar *code = p; int save_mode = SAVE_NONE; MonoMethod *method = cfg->method; - int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type; + int rtype = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret)->type; switch (rtype) { case MONO_TYPE_VOID: @@ -5360,7 +5775,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method); amd64_set_reg_template (code, AMD64_ARG_REG1); - code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func); + code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE); /* Restore result */ switch (save_mode) { @@ -5484,12 +5899,33 @@ mono_arch_get_patch_offset (guint8 *code) return 3; } +/** + * mono_breakpoint_clean_code: + * + * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software + * breakpoints in the original code, they are removed in the copy. + * + * Returns TRUE if no sw breakpoint was present. + */ gboolean -mono_breakpoint_clean_code (guint8 *code, guint8 *buf, int size) +mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size) { int i; gboolean can_write = TRUE; - memcpy (buf, code, size); + /* + * If method_start is non-NULL we need to perform bound checks, since we access memory + * at code - offset we could go before the start of the method and end up in a different + * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes + * instead. + */ + if (!method_start || code - offset >= method_start) { + memcpy (buf, code - offset, size); + } else { + int diff = code - method_start; + memset (buf, 0, size); + memcpy (buf + offset - diff, method_start, diff + size - offset); + } + code -= offset; for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) { int idx = mono_breakpoint_info_index [i]; guint8 *ptr; @@ -5514,8 +5950,8 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement) gint32 disp; guint8 rex = 0; - mono_breakpoint_clean_code (code - 10, buf, sizeof (buf)); - code = buf + 10; + mono_breakpoint_clean_code (NULL, code, 9, buf, sizeof (buf)); + code = buf + 9; *displacement = 0; @@ -5564,8 +6000,13 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement) /* call OFFSET(%rip) */ disp = *(guint32*)(code + 3); return (gpointer*)(code + disp + 7); - } - else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) { + } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_modrm_reg (code [2]) == X86_ESP) && (amd64_modrm_mod (code [2]) == 0) && (amd64_modrm_rm (code [2]) == X86_ESP)) { + /* call *[r12+disp32] */ + if (IS_REX (code [-1])) + rex = code [-1]; + reg = AMD64_RSP; + disp = *(gint32*)(code + 3); + } else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) { /* call *[reg+disp32] */ if (IS_REX (code [0])) rex = code [0]; @@ -5573,16 +6014,19 @@ mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement) disp = *(gint32*)(code + 3); /* R10 is clobbered by the IMT thunk code */ g_assert (reg != AMD64_R10); - } - else if (code [2] == 0xe8) { + } else if (code [2] == 0xe8) { /* call */ return NULL; - } - else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) { + } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_modrm_reg (code [5]) == X86_ESP) && (amd64_modrm_mod (code [5]) == 0) && (amd64_modrm_rm (code [5]) == X86_ESP)) { + /* call *[r12+disp32] */ + if (IS_REX (code [2])) + rex = code [2]; + reg = AMD64_RSP; + disp = *(gint8*)(code + 6); + } else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) { /* call *%reg */ return NULL; - } - else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) { + } else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) { /* call *[reg+disp8] */ if (IS_REX (code [3])) rex = code [3]; @@ -5625,12 +6069,17 @@ mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs) } int -mono_arch_get_this_arg_reg (MonoMethodSignature *sig) +mono_arch_get_this_arg_reg (MonoMethodSignature *sig, MonoGenericSharingContext *gsctx, guint8 *code) { int this_reg = AMD64_ARG_REG1; if (MONO_TYPE_ISSTRUCT (sig->ret)) { - CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE); + CallInfo *cinfo; + + if (!gsctx && code) + gsctx = mono_get_generic_context_from_code (code); + + cinfo = get_call_info (gsctx, NULL, sig, FALSE); if (cinfo->ret.storage != ArgValuetypeInReg) this_reg = AMD64_ARG_REG2; @@ -5641,9 +6090,9 @@ mono_arch_get_this_arg_reg (MonoMethodSignature *sig) } gpointer -mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code) +mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code) { - return (gpointer)regs [mono_arch_get_this_arg_reg (sig)]; + return (gpointer)regs [mono_arch_get_this_arg_reg (sig, gsctx, code)]; } #define MAX_ARCH_DELEGATE_PARAMS 10 @@ -5663,11 +6112,9 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe if (has_target) { static guint8* cached = NULL; - mono_mini_arch_lock (); - if (cached) { - mono_mini_arch_unlock (); + + if (cached) return cached; - } start = code = mono_global_codeman_reserve (64); @@ -5678,9 +6125,11 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe g_assert ((code - start) < 64); - cached = start; mono_debug_add_delegate_trampoline (start, code - start); - mono_mini_arch_unlock (); + + mono_memory_barrier (); + + cached = start; } else { static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL}; for (i = 0; i < sig->param_count; ++i) @@ -5689,12 +6138,9 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe if (sig->param_count > 4) return NULL; - mono_mini_arch_lock (); code = cache [sig->param_count]; - if (code) { - mono_mini_arch_unlock (); + if (code) return code; - } start = code = mono_global_codeman_reserve (64); @@ -5703,17 +6149,26 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe } else { /* We have to shift the arguments left */ amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8); - for (i = 0; i < sig->param_count; ++i) + for (i = 0; i < sig->param_count; ++i) { +#ifdef PLATFORM_WIN32 + if (i < 3) + amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8); + else + amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8); +#else amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8); +#endif + } amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr)); } g_assert ((code - start) < 64); - cache [sig->param_count] = start; - mono_debug_add_delegate_trampoline (start, code - start); - mono_mini_arch_unlock (); + + mono_memory_barrier (); + + cache [sig->param_count] = start; } return start; @@ -5730,6 +6185,24 @@ void mono_arch_setup_jit_tls_data (MonoJitTlsData *tls) { if (!tls_offset_inited) { +#ifdef PLATFORM_WIN32 + /* + * We need to init this multiple times, since when we are first called, the key might not + * be initialized yet. + */ + appdomain_tls_offset = mono_domain_get_tls_key (); + lmf_tls_offset = mono_get_jit_tls_key (); + thread_tls_offset = mono_thread_get_tls_key (); + lmf_addr_tls_offset = mono_get_jit_tls_key (); + + /* Only 64 tls entries can be accessed using inline code */ + if (appdomain_tls_offset >= 64) + appdomain_tls_offset = -1; + if (lmf_tls_offset >= 64) + lmf_tls_offset = -1; + if (thread_tls_offset >= 64) + thread_tls_offset = -1; +#else tls_offset_inited = TRUE; #ifdef MONO_XEN_OPT optimize_for_xen = access ("/proc/xen", F_OK) == 0; @@ -5738,6 +6211,7 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls) lmf_tls_offset = mono_get_lmf_tls_offset (); lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset (); thread_tls_offset = mono_thread_get_tls_offset (); +#endif } } @@ -5750,7 +6224,7 @@ void mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg) { MonoCallInst *call = (MonoCallInst*)inst; - CallInfo * cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE); + CallInfo * cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, inst->signature, FALSE); if (vt_reg != -1) { MonoInst *vtarg; @@ -5758,17 +6232,16 @@ mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_re if (cinfo->ret.storage == ArgValuetypeInReg) { /* * The valuetype is in RAX:RDX after the call, need to be copied to - * the stack. Push the address here, so the call instruction can + * the stack. Save the address here, so the call instruction can * access it. */ - MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH); - vtarg->sreg1 = vt_reg; - mono_bblock_add_inst (cfg->cbb, vtarg); + MonoInst *loc = cfg->arch.vret_addr_loc; - /* Align stack */ - MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8); - } - else { + g_assert (loc); + g_assert (loc->opcode == OP_REGOFFSET); + + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, loc->inst_basereg, loc->inst_offset, vt_reg); + } else { MONO_INST_NEW (cfg, vtarg, OP_MOVE); vtarg->sreg1 = vt_reg; vtarg->dreg = mono_regstate_next_int (cfg->rs); @@ -5814,7 +6287,8 @@ imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target) * LOCKING: called with the domain lock held */ gpointer -mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count) +mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count, + gpointer fail_tramp) { int i; int size = 0; @@ -5826,7 +6300,7 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI if (item->is_equals) { if (item->check_target_idx) { if (!item->compare_done) { - if (amd64_is_imm32 (item->method)) + if (amd64_is_imm32 (item->key)) item->chunk_size += CMP_SIZE; else item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE; @@ -5837,17 +6311,22 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI item->chunk_size += MOV_REG_IMM_SIZE; item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE; } else { - if (vtable_is_32bit) - item->chunk_size += MOV_REG_IMM_32BIT_SIZE; - else - item->chunk_size += MOV_REG_IMM_SIZE; - item->chunk_size += JUMP_REG_SIZE; - /* with assert below: - * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1; - */ + if (fail_tramp) { + item->chunk_size += MOV_REG_IMM_SIZE * 3 + CMP_REG_REG_SIZE + + BR_SMALL_SIZE + JUMP_REG_SIZE * 2; + } else { + if (vtable_is_32bit) + item->chunk_size += MOV_REG_IMM_32BIT_SIZE; + else + item->chunk_size += MOV_REG_IMM_SIZE; + item->chunk_size += JUMP_REG_SIZE; + /* with assert below: + * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1; + */ + } } } else { - if (amd64_is_imm32 (item->method)) + if (amd64_is_imm32 (item->key)) item->chunk_size += CMP_SIZE; else item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE; @@ -5856,7 +6335,10 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI } size += item->chunk_size; } - code = mono_code_manager_reserve (domain->code_mp, size); + if (fail_tramp) + code = mono_method_alloc_generic_virtual_thunk (domain, size); + else + code = mono_code_manager_reserve (domain->code_mp, size); start = code; for (i = 0; i < count; ++i) { MonoIMTCheckItem *item = imt_entries [i]; @@ -5864,43 +6346,73 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI if (item->is_equals) { if (item->check_target_idx) { if (!item->compare_done) { - if (amd64_is_imm32 (item->method)) - amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method); + if (amd64_is_imm32 (item->key)) + amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); else { - amd64_mov_reg_imm (code, AMD64_R10, item->method); + amd64_mov_reg_imm (code, AMD64_R10, item->key); amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10); } } item->jmp_code = code; amd64_branch8 (code, X86_CC_NE, 0, FALSE); - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); + /* See the comment below about R10 */ + if (fail_tramp) { + amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code); + amd64_jump_reg (code, AMD64_R10); + } else { + amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, AMD64_R10, 0); + } } else { - /* enable the commented code to assert on wrong method */ + if (fail_tramp) { + if (amd64_is_imm32 (item->key)) + amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); + else { + amd64_mov_reg_imm (code, AMD64_R10, item->key); + amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10); + } + item->jmp_code = code; + amd64_branch8 (code, X86_CC_NE, 0, FALSE); + amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code); + amd64_jump_reg (code, AMD64_R10); + amd64_patch (item->jmp_code, code); + amd64_mov_reg_imm (code, AMD64_R10, fail_tramp); + amd64_jump_reg (code, AMD64_R10); + item->jmp_code = NULL; + + } else { + /* enable the commented code to assert on wrong method */ #if 0 - if (amd64_is_imm32 (item->method)) - amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method); - else { - amd64_mov_reg_imm (code, AMD64_R10, item->method); - amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10); - } - item->jmp_code = code; - amd64_branch8 (code, X86_CC_NE, 0, FALSE); - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); - amd64_patch (item->jmp_code, code); - amd64_breakpoint (code); - item->jmp_code = NULL; + if (amd64_is_imm32 (item->key)) + amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); + else { + amd64_mov_reg_imm (code, AMD64_R10, item->key); + amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10); + } + item->jmp_code = code; + amd64_branch8 (code, X86_CC_NE, 0, FALSE); + /* See the comment below about R10 */ + amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, AMD64_R10, 0); + amd64_patch (item->jmp_code, code); + amd64_breakpoint (code); + item->jmp_code = NULL; #else - amd64_mov_reg_imm (code, AMD64_R11, & (vtable->vtable [item->vtable_slot])); - amd64_jump_membase (code, AMD64_R11, 0); + /* We're using R10 here because R11 + needs to be preserved. R10 needs + to be preserved for calls which + require a runtime generic context, + but interface calls don't. */ + amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot])); + amd64_jump_membase (code, AMD64_R10, 0); #endif + } } } else { - if (amd64_is_imm32 (item->method)) - amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method); + if (amd64_is_imm32 (item->key)) + amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key); else { - amd64_mov_reg_imm (code, AMD64_R10, item->method); + amd64_mov_reg_imm (code, AMD64_R10, item->key); amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10); } item->jmp_code = code; @@ -5920,8 +6432,9 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI } } } - - mono_stats.imt_thunks_size += code - start; + + if (!fail_tramp) + mono_stats.imt_thunks_size += code - start; g_assert (code - start <= size); return start; @@ -5930,58 +6443,28 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI MonoMethod* mono_arch_find_imt_method (gpointer *regs, guint8 *code) { - /* - * R11 is clobbered by the trampoline code, so we have to retrieve the method - * from the code. - * 41 bb c0 f7 89 00 mov $0x89f7c0,%r11d - * ff 90 68 ff ff ff callq *0xffffffffffffff68(%rax) - */ - /* Similar to get_vcall_slot_addr () */ - - /* Find the start of the call instruction */ - code -= 7; - if ((code [-2] == 0x41) && (code [-1] == 0xbb) && (code [4] == 0xff) && (x86_modrm_mod (code [5]) == 1) && (x86_modrm_reg (code [5]) == 2) && ((signed char)code [6] < 0)) { - /* IMT-based interface calls - * 41 bb 14 f8 28 08 mov $0x828f814,%r11 - * ff 50 fc call *0xfffffffc(%rax) - */ - code += 4; - } else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) { - /* call *[reg+disp32] */ - code += 1; - } else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) { - /* call *[reg+disp8] */ - code += 4; - } else - g_assert_not_reached (); - - /* Find the start of the mov instruction */ - code -= 10; - if (code [0] == 0x49 && code [1] == 0xbb) { - return (MonoMethod*)*(gssize*)(code + 2); - } else if (code [3] == 0x4d && code [4] == 0x8b && code [5] == 0x1d) { - /* mov (%rip),%r11 */ - return (MonoMethod*)*(gssize*)(code + 10 + *(guint32*)(code + 6)); - } else if (code [4] == 0x41 && code [5] == 0xbb) { - return (MonoMethod*)(gssize)*(guint32*)(code + 6); - } else { - int i; - - printf ("Unknown call sequence: "); - for (i = -10; i < 20; ++i) - printf ("%x ", code [i]); - g_assert_not_reached (); - return NULL; - } + return regs [MONO_ARCH_IMT_REG]; } MonoObject* mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx) { - return mono_arch_get_this_arg_from_call (mono_method_signature (method), (gssize*)regs, NULL); + return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL); +} + +void +mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg) +{ + /* Done by the implementation of the CALL_MEMBASE opcodes */ } #endif +MonoVTable* +mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code) +{ + return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG]; +} + MonoInst* mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { @@ -5994,16 +6477,6 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod } else if (strcmp (cmethod->name, "Cos") == 0) { MONO_INST_NEW (cfg, ins, OP_COS); ins->inst_i0 = args [0]; - } else if (strcmp (cmethod->name, "Tan") == 0) { - if (use_sse2) - return ins; - MONO_INST_NEW (cfg, ins, OP_TAN); - ins->inst_i0 = args [0]; - } else if (strcmp (cmethod->name, "Atan") == 0) { - if (use_sse2) - return ins; - MONO_INST_NEW (cfg, ins, OP_ATAN); - ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Sqrt") == 0) { MONO_INST_NEW (cfg, ins, OP_SQRT); ins->inst_i0 = args [0]; @@ -6018,13 +6491,21 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod if (strcmp (cmethod->name, "Min") == 0) { if (fsig->params [0]->type == MONO_TYPE_I4) opcode = OP_IMIN; + if (fsig->params [0]->type == MONO_TYPE_U4) + opcode = OP_IMIN_UN; else if (fsig->params [0]->type == MONO_TYPE_I8) opcode = OP_LMIN; + else if (fsig->params [0]->type == MONO_TYPE_U8) + opcode = OP_LMIN_UN; } else if (strcmp (cmethod->name, "Max") == 0) { if (fsig->params [0]->type == MONO_TYPE_I4) opcode = OP_IMAX; + if (fsig->params [0]->type == MONO_TYPE_U4) + opcode = OP_IMAX_UN; else if (fsig->params [0]->type == MONO_TYPE_I8) opcode = OP_LMAX; + else if (fsig->params [0]->type == MONO_TYPE_U8) + opcode = OP_LMAX_UN; } if (opcode) { @@ -6042,18 +6523,86 @@ mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethod ins->inst_i1 = args [1]; } #endif - } else if(cmethod->klass->image == mono_defaults.corlib && - (strcmp (cmethod->klass->name_space, "System.Threading") == 0) && - (strcmp (cmethod->klass->name, "Interlocked") == 0)) { - /* - * Can't implement CompareExchange methods this way since they have - * three arguments. - */ } return ins; } +MonoInst* +mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) +{ + MonoInst *ins = NULL; + int opcode = 0; + + if (cmethod->klass == mono_defaults.math_class) { + if (strcmp (cmethod->name, "Sin") == 0) { + opcode = OP_SIN; + } else if (strcmp (cmethod->name, "Cos") == 0) { + opcode = OP_COS; + } else if (strcmp (cmethod->name, "Sqrt") == 0) { + opcode = OP_SQRT; + } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) { + opcode = OP_ABS; + } + + if (opcode) { + MONO_INST_NEW (cfg, ins, opcode); + ins->type = STACK_R8; + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = args [0]->dreg; + MONO_ADD_INS (cfg->cbb, ins); + } + + opcode = 0; + if (cfg->opt & MONO_OPT_CMOV) { + if (strcmp (cmethod->name, "Min") == 0) { + if (fsig->params [0]->type == MONO_TYPE_I4) + opcode = OP_IMIN; + if (fsig->params [0]->type == MONO_TYPE_U4) + opcode = OP_IMIN_UN; + else if (fsig->params [0]->type == MONO_TYPE_I8) + opcode = OP_LMIN; + else if (fsig->params [0]->type == MONO_TYPE_U8) + opcode = OP_LMIN_UN; + } else if (strcmp (cmethod->name, "Max") == 0) { + if (fsig->params [0]->type == MONO_TYPE_I4) + opcode = OP_IMAX; + if (fsig->params [0]->type == MONO_TYPE_U4) + opcode = OP_IMAX_UN; + else if (fsig->params [0]->type == MONO_TYPE_I8) + opcode = OP_LMAX; + else if (fsig->params [0]->type == MONO_TYPE_U8) + opcode = OP_LMAX_UN; + } + } + + if (opcode) { + MONO_INST_NEW (cfg, ins, opcode); + ins->type = fsig->params [0]->type == MONO_TYPE_I4 ? STACK_I4 : STACK_I8; + ins->dreg = mono_alloc_ireg (cfg); + ins->sreg1 = args [0]->dreg; + ins->sreg2 = args [1]->dreg; + MONO_ADD_INS (cfg->cbb, ins); + } + +#if 0 + /* OP_FREM is not IEEE compatible */ + else if (strcmp (cmethod->name, "IEEERemainder") == 0) { + MONO_INST_NEW (cfg, ins, OP_FREM); + ins->inst_i0 = args [0]; + ins->inst_i1 = args [1]; + } +#endif + } + + /* + * Can't implement CompareExchange methods this way since they have + * three arguments. + */ + + return ins; +} + gboolean mono_arch_print_tree (MonoInst *tree, int arity) {