/* * mini-x86.c: x86 backend for the Mono code generator * * Authors: * Paolo Molaro (lupus@ximian.com) * Dietmar Maurer (dietmar@ximian.com) * Patrik Torstensson * * (C) 2003 Ximian, Inc. */ #include "mini.h" #include #include #include #include #include #include #include #include #include "trace.h" #include "mini-x86.h" #include "inssel.h" #include "cpu-pentium.h" /* On windows, these hold the key returned by TlsAlloc () */ static gint lmf_tls_offset = -1; static gint appdomain_tls_offset = -1; static gint thread_tls_offset = -1; #ifdef MONO_XEN_OPT /* TRUE by default until we add runtime detection of Xen */ static gboolean optimize_for_xen = TRUE; #else #define optimize_for_xen 0 #endif #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1)) #define ARGS_OFFSET 8 #ifdef PLATFORM_WIN32 /* Under windows, the default pinvoke calling convention is stdcall */ #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT)) #else #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL) #endif #define NOT_IMPLEMENTED g_assert_not_reached () const char* mono_arch_regname (int reg) { switch (reg) { case X86_EAX: return "%eax"; case X86_EBX: return "%ebx"; case X86_ECX: return "%ecx"; case X86_EDX: return "%edx"; case X86_ESP: return "%esp"; case X86_EBP: return "%ebp"; case X86_EDI: return "%edi"; case X86_ESI: return "%esi"; } return "unknown"; } const char* mono_arch_fregname (int reg) { return "unknown"; } typedef enum { ArgInIReg, ArgInFloatSSEReg, ArgInDoubleSSEReg, ArgOnStack, ArgValuetypeInReg, ArgOnFloatFpStack, ArgOnDoubleFpStack, ArgNone } ArgStorage; typedef struct { gint16 offset; gint8 reg; ArgStorage storage; /* Only if storage == ArgValuetypeInReg */ ArgStorage pair_storage [2]; gint8 pair_regs [2]; } ArgInfo; typedef struct { int nargs; guint32 stack_usage; guint32 reg_usage; guint32 freg_usage; gboolean need_stack_align; guint32 stack_align_amount; ArgInfo ret; ArgInfo sig_cookie; ArgInfo args [1]; } CallInfo; #define PARAM_REGS 0 #define FLOAT_PARAM_REGS 0 static X86_Reg_No param_regs [] = { 0 }; #ifdef PLATFORM_WIN32 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX }; #endif static void inline add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo) { ainfo->offset = *stack_size; if (*gr >= PARAM_REGS) { ainfo->storage = ArgOnStack; (*stack_size) += sizeof (gpointer); } else { ainfo->storage = ArgInIReg; ainfo->reg = param_regs [*gr]; (*gr) ++; } } static void inline add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo) { ainfo->offset = *stack_size; g_assert (PARAM_REGS == 0); ainfo->storage = ArgOnStack; (*stack_size) += sizeof (gpointer) * 2; } static void inline add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double) { ainfo->offset = *stack_size; if (*gr >= FLOAT_PARAM_REGS) { ainfo->storage = ArgOnStack; (*stack_size) += is_double ? 8 : 4; } else { /* A double register */ if (is_double) ainfo->storage = ArgInDoubleSSEReg; else ainfo->storage = ArgInFloatSSEReg; ainfo->reg = *gr; (*gr) += 1; } } static void add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type, gboolean is_return, guint32 *gr, guint32 *fr, guint32 *stack_size) { guint32 size; MonoClass *klass; klass = mono_class_from_mono_type (type); if (sig->pinvoke) size = mono_type_native_stack_size (&klass->byval_arg, NULL); else size = mono_type_stack_size (&klass->byval_arg, NULL); #ifdef PLATFORM_WIN32 if (sig->pinvoke && is_return) { MonoMarshalType *info; /* * the exact rules are not very well documented, the code below seems to work with the * code generated by gcc 3.3.3 -mno-cygwin. */ info = mono_marshal_load_type_info (klass); g_assert (info); ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone; /* Special case structs with only a float member */ if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) { ainfo->storage = ArgValuetypeInReg; ainfo->pair_storage [0] = ArgOnDoubleFpStack; return; } if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) { ainfo->storage = ArgValuetypeInReg; ainfo->pair_storage [0] = ArgOnFloatFpStack; return; } if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) { ainfo->storage = ArgValuetypeInReg; ainfo->pair_storage [0] = ArgInIReg; ainfo->pair_regs [0] = return_regs [0]; if (info->native_size > 4) { ainfo->pair_storage [1] = ArgInIReg; ainfo->pair_regs [1] = return_regs [1]; } return; } } #endif ainfo->offset = *stack_size; ainfo->storage = ArgOnStack; *stack_size += ALIGN_TO (size, sizeof (gpointer)); } /* * get_call_info: * * Obtain information about a call according to the calling convention. * For x86 ELF, see the "System V Application Binary Interface Intel386 * Architecture Processor Supplment, Fourth Edition" document for more * information. * For x86 win32, see ???. */ static CallInfo* get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke) { guint32 i, gr, fr; MonoType *ret_type; int n = sig->hasthis + sig->param_count; guint32 stack_size = 0; CallInfo *cinfo; cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n)); gr = 0; fr = 0; /* return value */ { ret_type = mono_type_get_underlying_type (sig->ret); switch (ret_type->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: case MONO_TYPE_U1: case MONO_TYPE_I2: case MONO_TYPE_U2: case MONO_TYPE_CHAR: case MONO_TYPE_I4: case MONO_TYPE_U4: case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_PTR: case MONO_TYPE_FNPTR: case MONO_TYPE_CLASS: case MONO_TYPE_OBJECT: case MONO_TYPE_SZARRAY: case MONO_TYPE_ARRAY: case MONO_TYPE_STRING: cinfo->ret.storage = ArgInIReg; cinfo->ret.reg = X86_EAX; break; case MONO_TYPE_U8: case MONO_TYPE_I8: cinfo->ret.storage = ArgInIReg; cinfo->ret.reg = X86_EAX; break; case MONO_TYPE_R4: cinfo->ret.storage = ArgOnFloatFpStack; break; case MONO_TYPE_R8: cinfo->ret.storage = ArgOnDoubleFpStack; break; case MONO_TYPE_GENERICINST: if (!mono_type_generic_inst_is_valuetype (sig->ret)) { cinfo->ret.storage = ArgInIReg; cinfo->ret.reg = X86_EAX; break; } /* Fall through */ case MONO_TYPE_VALUETYPE: { guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0; add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize); if (cinfo->ret.storage == ArgOnStack) /* The caller passes the address where the value is stored */ add_general (&gr, &stack_size, &cinfo->ret); break; } case MONO_TYPE_TYPEDBYREF: /* Same as a valuetype with size 24 */ add_general (&gr, &stack_size, &cinfo->ret); ; break; case MONO_TYPE_VOID: cinfo->ret.storage = ArgNone; break; default: g_error ("Can't handle as return value 0x%x", sig->ret->type); } } /* this */ if (sig->hasthis) add_general (&gr, &stack_size, cinfo->args + 0); if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) { gr = PARAM_REGS; fr = FLOAT_PARAM_REGS; /* Emit the signature cookie just before the implicit arguments */ add_general (&gr, &stack_size, &cinfo->sig_cookie); } for (i = 0; i < sig->param_count; ++i) { ArgInfo *ainfo = &cinfo->args [sig->hasthis + i]; MonoType *ptype; if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) { /* We allways pass the sig cookie on the stack for simplicity */ /* * Prevent implicit arguments + the sig cookie from being passed * in registers. */ gr = PARAM_REGS; fr = FLOAT_PARAM_REGS; /* Emit the signature cookie just before the implicit arguments */ add_general (&gr, &stack_size, &cinfo->sig_cookie); } if (sig->params [i]->byref) { add_general (&gr, &stack_size, ainfo); continue; } ptype = mono_type_get_underlying_type (sig->params [i]); switch (ptype->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: case MONO_TYPE_U1: add_general (&gr, &stack_size, ainfo); break; case MONO_TYPE_I2: case MONO_TYPE_U2: case MONO_TYPE_CHAR: add_general (&gr, &stack_size, ainfo); break; case MONO_TYPE_I4: case MONO_TYPE_U4: add_general (&gr, &stack_size, ainfo); break; case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_PTR: case MONO_TYPE_FNPTR: case MONO_TYPE_CLASS: case MONO_TYPE_OBJECT: case MONO_TYPE_STRING: case MONO_TYPE_SZARRAY: case MONO_TYPE_ARRAY: add_general (&gr, &stack_size, ainfo); break; case MONO_TYPE_GENERICINST: if (!mono_type_generic_inst_is_valuetype (sig->params [i])) { add_general (&gr, &stack_size, ainfo); break; } /* Fall through */ case MONO_TYPE_VALUETYPE: add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size); break; case MONO_TYPE_TYPEDBYREF: stack_size += sizeof (MonoTypedRef); ainfo->storage = ArgOnStack; break; case MONO_TYPE_U8: case MONO_TYPE_I8: add_general_pair (&gr, &stack_size, ainfo); break; case MONO_TYPE_R4: add_float (&fr, &stack_size, ainfo, FALSE); break; case MONO_TYPE_R8: add_float (&fr, &stack_size, ainfo, TRUE); break; default: g_error ("unexpected type 0x%x", ptype->type); g_assert_not_reached (); } } if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) { gr = PARAM_REGS; fr = FLOAT_PARAM_REGS; /* Emit the signature cookie just before the implicit arguments */ add_general (&gr, &stack_size, &cinfo->sig_cookie); } #if defined(__APPLE__) if ((stack_size % 16) != 0) { cinfo->need_stack_align = TRUE; stack_size += cinfo->stack_align_amount = 16-(stack_size % 16); } #endif cinfo->stack_usage = stack_size; cinfo->reg_usage = gr; cinfo->freg_usage = fr; return cinfo; } /* * mono_arch_get_argument_info: * @csig: a method signature * @param_count: the number of parameters to consider * @arg_info: an array to store the result infos * * Gathers information on parameters such as size, alignment and * padding. arg_info should be large enought to hold param_count + 1 entries. * * Returns the size of the activation frame. */ int mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info) { int k, frame_size = 0; int size, pad; guint32 align; int offset = 8; CallInfo *cinfo; cinfo = get_call_info (csig, FALSE); if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) { frame_size += sizeof (gpointer); offset += 4; } arg_info [0].offset = offset; if (csig->hasthis) { frame_size += sizeof (gpointer); offset += 4; } arg_info [0].size = frame_size; for (k = 0; k < param_count; k++) { if (csig->pinvoke) size = mono_type_native_stack_size (csig->params [k], &align); else { int ialign; size = mono_type_stack_size (csig->params [k], &ialign); align = ialign; } /* ignore alignment for now */ align = 1; frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); arg_info [k].pad = pad; frame_size += size; arg_info [k + 1].pad = 0; arg_info [k + 1].size = size; offset += pad; arg_info [k + 1].offset = offset; offset += size; } align = MONO_ARCH_FRAME_ALIGNMENT; frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); arg_info [k].pad = pad; g_free (cinfo); return frame_size; } static const guchar cpuid_impl [] = { 0x55, /* push %ebp */ 0x89, 0xe5, /* mov %esp,%ebp */ 0x53, /* push %ebx */ 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */ 0x0f, 0xa2, /* cpuid */ 0x50, /* push %eax */ 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */ 0x89, 0x18, /* mov %ebx,(%eax) */ 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */ 0x89, 0x08, /* mov %ecx,(%eax) */ 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */ 0x89, 0x10, /* mov %edx,(%eax) */ 0x58, /* pop %eax */ 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */ 0x89, 0x02, /* mov %eax,(%edx) */ 0x5b, /* pop %ebx */ 0xc9, /* leave */ 0xc3, /* ret */ }; typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx); static int cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx) { int have_cpuid = 0; #ifndef _MSC_VER __asm__ __volatile__ ( "pushfl\n" "popl %%eax\n" "movl %%eax, %%edx\n" "xorl $0x200000, %%eax\n" "pushl %%eax\n" "popfl\n" "pushfl\n" "popl %%eax\n" "xorl %%edx, %%eax\n" "andl $0x200000, %%eax\n" "movl %%eax, %0" : "=r" (have_cpuid) : : "%eax", "%edx" ); #else __asm { pushfd pop eax mov edx, eax xor eax, 0x200000 push eax popfd pushfd pop eax xor eax, edx and eax, 0x200000 mov have_cpuid, eax } #endif if (have_cpuid) { /* Have to use the code manager to get around WinXP DEP */ MonoCodeManager *codeman = mono_code_manager_new_dynamic (); CpuidFunc func; void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl)); memcpy (ptr, cpuid_impl, sizeof (cpuid_impl)); func = (CpuidFunc)ptr; func (id, p_eax, p_ebx, p_ecx, p_edx); mono_code_manager_destroy (codeman); /* * We use this approach because of issues with gcc and pic code, see: * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329 __asm__ __volatile__ ("cpuid" : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx) : "a" (id)); */ return 1; } return 0; } /* * Initialize the cpu to execute managed code. */ void mono_arch_cpu_init (void) { /* spec compliance requires running with double precision */ #ifndef _MSC_VER guint16 fpcw; __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw)); fpcw &= ~X86_FPCW_PRECC_MASK; fpcw |= X86_FPCW_PREC_DOUBLE; __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw)); __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw)); #else _control87 (_PC_53, MCW_PC); #endif } /* * This function returns the optimizations supported on this cpu. */ guint32 mono_arch_cpu_optimizazions (guint32 *exclude_mask) { int eax, ebx, ecx, edx; guint32 opts = 0; *exclude_mask = 0; /* Feature Flags function, flags returned in EDX. */ if (cpuid (1, &eax, &ebx, &ecx, &edx)) { if (edx & (1 << 15)) { opts |= MONO_OPT_CMOV; if (edx & 1) opts |= MONO_OPT_FCMOV; else *exclude_mask |= MONO_OPT_FCMOV; } else *exclude_mask |= MONO_OPT_CMOV; } return opts; } /* * Determine whenever the trap whose info is in SIGINFO is caused by * integer overflow. */ gboolean mono_arch_is_int_overflow (void *sigctx, void *info) { MonoContext ctx; guint8* ip; mono_arch_sigctx_to_monoctx (sigctx, &ctx); ip = (guint8*)ctx.eip; if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) { gint32 reg; /* idiv REG */ switch (x86_modrm_rm (ip [1])) { case X86_EAX: reg = ctx.eax; break; case X86_ECX: reg = ctx.ecx; break; case X86_EDX: reg = ctx.edx; break; case X86_EBX: reg = ctx.ebx; break; case X86_ESI: reg = ctx.esi; break; case X86_EDI: reg = ctx.edi; break; default: g_assert_not_reached (); reg = -1; } if (reg == -1) return TRUE; } return FALSE; } static gboolean is_regsize_var (MonoType *t) { if (t->byref) return TRUE; switch (mono_type_get_underlying_type (t)->type) { case MONO_TYPE_I4: case MONO_TYPE_U4: case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_PTR: case MONO_TYPE_FNPTR: return TRUE; case MONO_TYPE_OBJECT: case MONO_TYPE_STRING: case MONO_TYPE_CLASS: case MONO_TYPE_SZARRAY: case MONO_TYPE_ARRAY: return TRUE; case MONO_TYPE_GENERICINST: if (!mono_type_generic_inst_is_valuetype (t)) return TRUE; return FALSE; case MONO_TYPE_VALUETYPE: return FALSE; } return FALSE; } GList * mono_arch_get_allocatable_int_vars (MonoCompile *cfg) { GList *vars = NULL; int i; for (i = 0; i < cfg->num_varinfo; i++) { MonoInst *ins = cfg->varinfo [i]; MonoMethodVar *vmv = MONO_VARINFO (cfg, i); /* unused vars */ if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos) continue; if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG)) continue; /* we dont allocate I1 to registers because there is no simply way to sign extend * 8bit quantities in caller saved registers on x86 */ if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)|| (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) { g_assert (MONO_VARINFO (cfg, i)->reg == -1); g_assert (i == vmv->idx); vars = g_list_prepend (vars, vmv); } } vars = mono_varlist_sort (cfg, vars, 0); return vars; } GList * mono_arch_get_global_int_regs (MonoCompile *cfg) { GList *regs = NULL; /* we can use 3 registers for global allocation */ regs = g_list_prepend (regs, (gpointer)X86_EBX); regs = g_list_prepend (regs, (gpointer)X86_ESI); regs = g_list_prepend (regs, (gpointer)X86_EDI); return regs; } /* * mono_arch_regalloc_cost: * * Return the cost, in number of memory references, of the action of * allocating the variable VMV into a register during global register * allocation. */ guint32 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv) { MonoInst *ins = cfg->varinfo [vmv->idx]; if (cfg->method->save_lmf) /* The register is already saved */ return (ins->opcode == OP_ARG) ? 1 : 0; else /* push+pop+possible load if it is an argument */ return (ins->opcode == OP_ARG) ? 3 : 2; } /* * Set var information according to the calling convention. X86 version. * The locals var stuff should most likely be split in another method. */ void mono_arch_allocate_vars (MonoCompile *cfg) { MonoMethodSignature *sig; MonoMethodHeader *header; MonoInst *inst; guint32 locals_stack_size, locals_stack_align; int i, offset; gint32 *offsets; CallInfo *cinfo; header = mono_method_get_header (cfg->method); sig = mono_method_signature (cfg->method); cinfo = get_call_info (sig, FALSE); cfg->frame_reg = MONO_ARCH_BASEREG; offset = 0; /* Reserve space to save LMF and caller saved registers */ if (cfg->method->save_lmf) { offset += sizeof (MonoLMF); } else { if (cfg->used_int_regs & (1 << X86_EBX)) { offset += 4; } if (cfg->used_int_regs & (1 << X86_EDI)) { offset += 4; } if (cfg->used_int_regs & (1 << X86_ESI)) { offset += 4; } } switch (cinfo->ret.storage) { case ArgValuetypeInReg: /* Allocate a local to hold the result, the epilog will copy it to the correct place */ offset += 8; cfg->ret->opcode = OP_REGOFFSET; cfg->ret->inst_basereg = X86_EBP; cfg->ret->inst_offset = - offset; break; default: break; } /* Allocate locals */ offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align); if (locals_stack_align) { offset += (locals_stack_align - 1); offset &= ~(locals_stack_align - 1); } for (i = cfg->locals_start; i < cfg->num_varinfo; i++) { if (offsets [i] != -1) { MonoInst *inst = cfg->varinfo [i]; inst->opcode = OP_REGOFFSET; inst->inst_basereg = X86_EBP; inst->inst_offset = - (offset + offsets [i]); //printf ("allocated local %d to ", i); mono_print_tree_nl (inst); } } g_free (offsets); offset += locals_stack_size; /* * Allocate arguments+return value */ switch (cinfo->ret.storage) { case ArgOnStack: cfg->ret->opcode = OP_REGOFFSET; cfg->ret->inst_basereg = X86_EBP; cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET; break; case ArgValuetypeInReg: break; case ArgInIReg: cfg->ret->opcode = OP_REGVAR; cfg->ret->inst_c0 = cinfo->ret.reg; break; case ArgNone: case ArgOnFloatFpStack: case ArgOnDoubleFpStack: break; default: g_assert_not_reached (); } if (sig->call_convention == MONO_CALL_VARARG) { g_assert (cinfo->sig_cookie.storage == ArgOnStack); cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET; } for (i = 0; i < sig->param_count + sig->hasthis; ++i) { ArgInfo *ainfo = &cinfo->args [i]; inst = cfg->varinfo [i]; if (inst->opcode != OP_REGVAR) { inst->opcode = OP_REGOFFSET; inst->inst_basereg = X86_EBP; } inst->inst_offset = ainfo->offset + ARGS_OFFSET; } offset += (MONO_ARCH_FRAME_ALIGNMENT - 1); offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1); cfg->stack_offset = offset; g_free (cinfo); } void mono_arch_create_vars (MonoCompile *cfg) { MonoMethodSignature *sig; CallInfo *cinfo; sig = mono_method_signature (cfg->method); cinfo = get_call_info (sig, FALSE); if (cinfo->ret.storage == ArgValuetypeInReg) cfg->ret_var_is_local = TRUE; g_free (cinfo); } /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode, * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info */ /* * take the arguments and generate the arch-specific * instructions to properly call the function in call. * This includes pushing, moving arguments to the right register * etc. */ MonoCallInst* mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) { MonoInst *arg, *in; MonoMethodSignature *sig; int i, n; CallInfo *cinfo; int sentinelpos; sig = call->signature; n = sig->param_count + sig->hasthis; cinfo = get_call_info (sig, FALSE); if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0); for (i = 0; i < n; ++i) { ArgInfo *ainfo = cinfo->args + i; /* Emit the signature cookie just before the implicit arguments */ if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) { MonoMethodSignature *tmp_sig; MonoInst *sig_arg; /* FIXME: Add support for signature tokens to AOT */ cfg->disable_aot = TRUE; MONO_INST_NEW (cfg, arg, OP_OUTARG); /* * mono_ArgIterator_Setup assumes the signature cookie is * passed first and all the arguments which were before it are * passed on the stack after the signature. So compensate by * passing a different signature. */ tmp_sig = mono_metadata_signature_dup (call->signature); tmp_sig->param_count -= call->signature->sentinelpos; tmp_sig->sentinelpos = 0; memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*)); MONO_INST_NEW (cfg, sig_arg, OP_ICONST); sig_arg->inst_p0 = tmp_sig; arg->inst_left = sig_arg; arg->type = STACK_PTR; /* prepend, so they get reversed */ arg->next = call->out_args; call->out_args = arg; } if (is_virtual && i == 0) { /* the argument will be attached to the call instrucion */ in = call->args [i]; } else { MonoType *t; if (i >= sig->hasthis) t = sig->params [i - sig->hasthis]; else t = &mono_defaults.int_class->byval_arg; t = mono_type_get_underlying_type (t); MONO_INST_NEW (cfg, arg, OP_OUTARG); in = call->args [i]; arg->cil_code = in->cil_code; arg->inst_left = in; arg->type = in->type; /* prepend, so they get reversed */ arg->next = call->out_args; call->out_args = arg; if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) { guint32 size, align; if (t->type == MONO_TYPE_TYPEDBYREF) { size = sizeof (MonoTypedRef); align = sizeof (gpointer); } else if (sig->pinvoke) size = mono_type_native_stack_size (&in->klass->byval_arg, &align); else { int ialign; size = mono_type_stack_size (&in->klass->byval_arg, &ialign); align = ialign; } arg->opcode = OP_OUTARG_VT; arg->klass = in->klass; arg->unused = sig->pinvoke; arg->inst_imm = size; } else { switch (ainfo->storage) { case ArgOnStack: arg->opcode = OP_OUTARG; if (!t->byref) { if (t->type == MONO_TYPE_R4) arg->opcode = OP_OUTARG_R4; else if (t->type == MONO_TYPE_R8) arg->opcode = OP_OUTARG_R8; } break; default: g_assert_not_reached (); } } } } if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) { if (cinfo->ret.storage == ArgValuetypeInReg) { MonoInst *zero_inst; /* * After the call, the struct is in registers, but needs to be saved to the memory pointed * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere * before calling the function. So we add a dummy instruction to represent pushing the * struct return address to the stack. The return address will be saved to this stack slot * by the code emitted in this_vret_args. */ MONO_INST_NEW (cfg, arg, OP_OUTARG); MONO_INST_NEW (cfg, zero_inst, OP_ICONST); zero_inst->inst_p0 = 0; arg->inst_left = zero_inst; arg->type = STACK_PTR; /* prepend, so they get reversed */ arg->next = call->out_args; call->out_args = arg; } else /* if the function returns a struct, the called method already does a ret $0x4 */ if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) cinfo->stack_usage -= 4; } call->stack_usage = cinfo->stack_usage; #if defined(__APPLE__) if (cinfo->need_stack_align) { MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK); arg->inst_c0 = cinfo->stack_align_amount; arg->next = call->out_args; call->out_args = arg; } #endif g_free (cinfo); return call; } /* * Allow tracing to work with this interface (with an optional argument) */ void* mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments) { guchar *code = p; /* if some args are passed in registers, we need to save them here */ x86_push_reg (code, X86_EBP); if (cfg->compile_aot) { x86_push_imm (code, cfg->method); x86_mov_reg_imm (code, X86_EAX, func); x86_call_reg (code, X86_EAX); } else { mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method); x86_push_imm (code, cfg->method); mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func); x86_call_code (code, 0); } x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8); return code; } enum { SAVE_NONE, SAVE_STRUCT, SAVE_EAX, SAVE_EAX_EDX, SAVE_FP }; void* mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments) { guchar *code = p; int arg_size = 0, save_mode = SAVE_NONE; MonoMethod *method = cfg->method; switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) { case MONO_TYPE_VOID: /* special case string .ctor icall */ if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) save_mode = SAVE_EAX; else save_mode = SAVE_NONE; break; case MONO_TYPE_I8: case MONO_TYPE_U8: save_mode = SAVE_EAX_EDX; break; case MONO_TYPE_R4: case MONO_TYPE_R8: save_mode = SAVE_FP; break; case MONO_TYPE_GENERICINST: if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) { save_mode = SAVE_EAX; break; } /* Fall through */ case MONO_TYPE_VALUETYPE: save_mode = SAVE_STRUCT; break; default: save_mode = SAVE_EAX; break; } switch (save_mode) { case SAVE_EAX_EDX: x86_push_reg (code, X86_EDX); x86_push_reg (code, X86_EAX); if (enable_arguments) { x86_push_reg (code, X86_EDX); x86_push_reg (code, X86_EAX); arg_size = 8; } break; case SAVE_EAX: x86_push_reg (code, X86_EAX); if (enable_arguments) { x86_push_reg (code, X86_EAX); arg_size = 4; } break; case SAVE_FP: x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8); x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE); if (enable_arguments) { x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8); x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE); arg_size = 8; } break; case SAVE_STRUCT: if (enable_arguments) { x86_push_membase (code, X86_EBP, 8); arg_size = 4; } break; case SAVE_NONE: default: break; } if (cfg->compile_aot) { x86_push_imm (code, method); x86_mov_reg_imm (code, X86_EAX, func); x86_call_reg (code, X86_EAX); } else { mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method); x86_push_imm (code, method); mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func); x86_call_code (code, 0); } x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4); switch (save_mode) { case SAVE_EAX_EDX: x86_pop_reg (code, X86_EAX); x86_pop_reg (code, X86_EDX); break; case SAVE_EAX: x86_pop_reg (code, X86_EAX); break; case SAVE_FP: x86_fld_membase (code, X86_ESP, 0, TRUE); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8); break; case SAVE_NONE: default: break; } return code; } #define EMIT_COND_BRANCH(ins,cond,sign) \ if (ins->flags & MONO_INST_BRLABEL) { \ if (ins->inst_i0->inst_c0) { \ x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \ } else { \ mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \ if ((cfg->opt & MONO_OPT_BRANCH) && \ x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \ x86_branch8 (code, cond, 0, sign); \ else \ x86_branch32 (code, cond, 0, sign); \ } \ } else { \ if (ins->inst_true_bb->native_offset) { \ x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \ } else { \ mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \ if ((cfg->opt & MONO_OPT_BRANCH) && \ x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \ x86_branch8 (code, cond, 0, sign); \ else \ x86_branch32 (code, cond, 0, sign); \ } \ } /* * Emit an exception if condition is fail and * if possible do a directly branch to target */ #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \ do { \ MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \ if (tins == NULL) { \ mono_add_patch_info (cfg, code - cfg->native_code, \ MONO_PATCH_INFO_EXC, exc_name); \ x86_branch32 (code, cond, 0, signed); \ } else { \ EMIT_COND_BRANCH (tins, cond, signed); \ } \ } while (0); #define EMIT_FPCOMPARE(code) do { \ x86_fcompp (code); \ x86_fnstsw (code); \ } while (0); static guint8* emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data) { mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data); x86_call_code (code, 0); return code; } /* FIXME: Add more instructions */ #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG)) static void peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins, *last_ins = NULL; ins = bb->code; while (ins) { switch (ins->opcode) { case OP_ICONST: /* reg = 0 -> XOR (reg, reg) */ /* XOR sets cflags on x86, so we cant do it always */ if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) { ins->opcode = CEE_XOR; ins->sreg1 = ins->dreg; ins->sreg2 = ins->dreg; } break; case OP_MUL_IMM: /* remove unnecessary multiplication with 1 */ if (ins->inst_imm == 1) { if (ins->dreg != ins->sreg1) { ins->opcode = OP_MOVE; } else { last_ins->next = ins->next; ins = ins->next; continue; } } break; case OP_COMPARE_IMM: /* OP_COMPARE_IMM (reg, 0) * --> * OP_X86_TEST_NULL (reg) */ if (!ins->inst_imm) ins->opcode = OP_X86_TEST_NULL; break; case OP_X86_COMPARE_MEMBASE_IMM: /* * OP_STORE_MEMBASE_REG reg, offset(basereg) * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm * --> * OP_STORE_MEMBASE_REG reg, offset(basereg) * OP_COMPARE_IMM reg, imm * * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL */ if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { ins->opcode = OP_COMPARE_IMM; ins->sreg1 = last_ins->sreg1; /* check if we can remove cmp reg,0 with test null */ if (!ins->inst_imm) ins->opcode = OP_X86_TEST_NULL; } break; case OP_LOAD_MEMBASE: case OP_LOADI4_MEMBASE: /* * Note: if reg1 = reg2 the load op is removed * * OP_STORE_MEMBASE_REG reg1, offset(basereg) * OP_LOAD_MEMBASE offset(basereg), reg2 * --> * OP_STORE_MEMBASE_REG reg1, offset(basereg) * OP_MOVE reg1, reg2 */ if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG || last_ins->opcode == OP_STORE_MEMBASE_REG) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { if (ins->dreg == last_ins->sreg1) { last_ins->next = ins->next; ins = ins->next; continue; } else { //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); ins->opcode = OP_MOVE; ins->sreg1 = last_ins->sreg1; } /* * Note: reg1 must be different from the basereg in the second load * Note: if reg1 = reg2 is equal then second load is removed * * OP_LOAD_MEMBASE offset(basereg), reg1 * OP_LOAD_MEMBASE offset(basereg), reg2 * --> * OP_LOAD_MEMBASE offset(basereg), reg1 * OP_MOVE reg1, reg2 */ } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE || last_ins->opcode == OP_LOAD_MEMBASE) && ins->inst_basereg != last_ins->dreg && ins->inst_basereg == last_ins->inst_basereg && ins->inst_offset == last_ins->inst_offset) { if (ins->dreg == last_ins->dreg) { last_ins->next = ins->next; ins = ins->next; continue; } else { ins->opcode = OP_MOVE; ins->sreg1 = last_ins->dreg; } //g_assert_not_reached (); #if 0 /* * OP_STORE_MEMBASE_IMM imm, offset(basereg) * OP_LOAD_MEMBASE offset(basereg), reg * --> * OP_STORE_MEMBASE_IMM imm, offset(basereg) * OP_ICONST reg, imm */ } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM || last_ins->opcode == OP_STORE_MEMBASE_IMM) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); ins->opcode = OP_ICONST; ins->inst_c0 = last_ins->inst_imm; g_assert_not_reached (); // check this rule #endif } break; case OP_LOADU1_MEMBASE: case OP_LOADI1_MEMBASE: /* * OP_STORE_MEMBASE_REG reg1, offset(basereg) * OP_LOAD_MEMBASE offset(basereg), reg2 * --> * OP_STORE_MEMBASE_REG reg1, offset(basereg) * CONV_I2/U2 reg1, reg2 */ if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1; ins->sreg1 = last_ins->sreg1; } break; case OP_LOADU2_MEMBASE: case OP_LOADI2_MEMBASE: /* * OP_STORE_MEMBASE_REG reg1, offset(basereg) * OP_LOAD_MEMBASE offset(basereg), reg2 * --> * OP_STORE_MEMBASE_REG reg1, offset(basereg) * CONV_I2/U2 reg1, reg2 */ if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2; ins->sreg1 = last_ins->sreg1; } break; case CEE_CONV_I4: case CEE_CONV_U4: case OP_MOVE: /* * Removes: * * OP_MOVE reg, reg */ if (ins->dreg == ins->sreg1) { if (last_ins) last_ins->next = ins->next; ins = ins->next; continue; } /* * Removes: * * OP_MOVE sreg, dreg * OP_MOVE dreg, sreg */ if (last_ins && last_ins->opcode == OP_MOVE && ins->sreg1 == last_ins->dreg && ins->dreg == last_ins->sreg1) { last_ins->next = ins->next; ins = ins->next; continue; } break; case OP_X86_PUSH_MEMBASE: if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG || last_ins->opcode == OP_STORE_MEMBASE_REG) && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { ins->opcode = OP_X86_PUSH; ins->sreg1 = last_ins->sreg1; } break; } last_ins = ins; ins = ins->next; } bb->last_ins = last_ins; } static const int branch_cc_table [] = { X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT, X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT, X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC }; static const char*const * ins_spec = pentium_desc; /*#include "cprop.c"*/ void mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb) { mono_local_regalloc (cfg, bb); } static unsigned char* emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed) { x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4); x86_fnstcw_membase(code, X86_ESP, 0); x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2); x86_alu_reg_imm (code, X86_OR, dreg, 0xc00); x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2); x86_fldcw_membase (code, X86_ESP, 2); if (size == 8) { x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8); x86_fist_pop_membase (code, X86_ESP, 0, TRUE); x86_pop_reg (code, dreg); /* FIXME: need the high register * x86_pop_reg (code, dreg_high); */ } else { x86_push_reg (code, X86_EAX); // SP = SP - 4 x86_fist_pop_membase (code, X86_ESP, 0, FALSE); x86_pop_reg (code, dreg); } x86_fldcw_membase (code, X86_ESP, 0); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); if (size == 1) x86_widen_reg (code, dreg, dreg, is_signed, FALSE); else if (size == 2) x86_widen_reg (code, dreg, dreg, is_signed, TRUE); return code; } static unsigned char* mono_emit_stack_alloc (guchar *code, MonoInst* tree) { int sreg = tree->sreg1; int need_touch = FALSE; #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK) need_touch = TRUE; #endif if (need_touch) { guint8* br[5]; /* * Under Windows: * If requested stack size is larger than one page, * perform stack-touch operation */ /* * Generate stack probe code. * Under Windows, it is necessary to allocate one page at a time, * "touching" stack after each successful sub-allocation. This is * because of the way stack growth is implemented - there is a * guard page before the lowest stack page that is currently commited. * Stack normally grows sequentially so OS traps access to the * guard page and commits more pages when needed. */ x86_test_reg_imm (code, sreg, ~0xFFF); br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE); br[2] = code; /* loop */ x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000); x86_test_membase_reg (code, X86_ESP, 0, X86_ESP); /* * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine * that follows only initializes the last part of the area. */ /* Same as the init code below with size==0x1000 */ if (tree->flags & MONO_INST_INIT) { x86_push_reg (code, X86_EAX); x86_push_reg (code, X86_ECX); x86_push_reg (code, X86_EDI); x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2)); x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX); x86_lea_membase (code, X86_EDI, X86_ESP, 12); x86_cld (code); x86_prefix (code, X86_REP_PREFIX); x86_stosl (code); x86_pop_reg (code, X86_EDI); x86_pop_reg (code, X86_ECX); x86_pop_reg (code, X86_EAX); } x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000); x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000); br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE); x86_patch (br[3], br[2]); x86_test_reg_reg (code, sreg, sreg); br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE); x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg); br[1] = code; x86_jump8 (code, 0); x86_patch (br[0], code); x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg); x86_patch (br[1], code); x86_patch (br[4], code); } else x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1); if (tree->flags & MONO_INST_INIT) { int offset = 0; if (tree->dreg != X86_EAX && sreg != X86_EAX) { x86_push_reg (code, X86_EAX); offset += 4; } if (tree->dreg != X86_ECX && sreg != X86_ECX) { x86_push_reg (code, X86_ECX); offset += 4; } if (tree->dreg != X86_EDI && sreg != X86_EDI) { x86_push_reg (code, X86_EDI); offset += 4; } x86_shift_reg_imm (code, X86_SHR, sreg, 2); if (sreg != X86_ECX) x86_mov_reg_reg (code, X86_ECX, sreg, 4); x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX); x86_lea_membase (code, X86_EDI, X86_ESP, offset); x86_cld (code); x86_prefix (code, X86_REP_PREFIX); x86_stosl (code); if (tree->dreg != X86_EDI && sreg != X86_EDI) x86_pop_reg (code, X86_EDI); if (tree->dreg != X86_ECX && sreg != X86_ECX) x86_pop_reg (code, X86_ECX); if (tree->dreg != X86_EAX && sreg != X86_EAX) x86_pop_reg (code, X86_EAX); } return code; } static guint8* emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) { CallInfo *cinfo; int quad; /* Move return value to the target register */ switch (ins->opcode) { case CEE_CALL: case OP_CALL_REG: case OP_CALL_MEMBASE: if (ins->dreg != X86_EAX) x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4); break; case OP_VCALL: case OP_VCALL_REG: case OP_VCALL_MEMBASE: cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE); if (cinfo->ret.storage == ArgValuetypeInReg) { /* Pop the destination address from the stack */ x86_pop_reg (code, X86_ECX); for (quad = 0; quad < 2; quad ++) { switch (cinfo->ret.pair_storage [quad]) { case ArgInIReg: g_assert (cinfo->ret.pair_regs [quad] != X86_ECX); x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer)); break; case ArgNone: break; default: g_assert_not_reached (); } } } g_free (cinfo); default: break; } return code; } /* * emit_tls_get: * @code: buffer to store code to * @dreg: hard register where to place the result * @tls_offset: offset info * * emit_tls_get emits in @code the native code that puts in the dreg register * the item in the thread local storage identified by tls_offset. * * Returns: a pointer to the end of the stored code */ static guint8* emit_tls_get (guint8* code, int dreg, int tls_offset) { #ifdef PLATFORM_WIN32 /* * See the Under the Hood article in the May 1996 issue of Microsoft Systems * Journal and/or a disassembly of the TlsGet () function. */ g_assert (tls_offset < 64); x86_prefix (code, X86_FS_PREFIX); x86_mov_reg_mem (code, dreg, 0x18, 4); /* Dunno what this does but TlsGetValue () contains it */ x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0); x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4); #else if (optimize_for_xen) { x86_prefix (code, X86_GS_PREFIX); x86_mov_reg_mem (code, dreg, 0, 4); x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4); } else { x86_prefix (code, X86_GS_PREFIX); x86_mov_reg_mem (code, dreg, tls_offset, 4); } #endif return code; } #define REAL_PRINT_REG(text,reg) \ mono_assert (reg >= 0); \ x86_push_reg (code, X86_EAX); \ x86_push_reg (code, X86_EDX); \ x86_push_reg (code, X86_ECX); \ x86_push_reg (code, reg); \ x86_push_imm (code, reg); \ x86_push_imm (code, text " %d %p\n"); \ x86_mov_reg_imm (code, X86_EAX, printf); \ x86_call_reg (code, X86_EAX); \ x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \ x86_pop_reg (code, X86_ECX); \ x86_pop_reg (code, X86_EDX); \ x86_pop_reg (code, X86_EAX); /* benchmark and set based on cpu */ #define LOOP_ALIGNMENT 8 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting) void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins; MonoCallInst *call; guint offset; guint8 *code = cfg->native_code + cfg->code_len; MonoInst *last_ins = NULL; guint last_offset = 0; int max_len, cpos; if (cfg->opt & MONO_OPT_PEEPHOLE) peephole_pass (cfg, bb); if (cfg->opt & MONO_OPT_LOOP) { int pad, align = LOOP_ALIGNMENT; /* set alignment depending on cpu */ if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) { pad = align - pad; /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/ x86_padding (code, pad); cfg->code_len += pad; bb->native_offset = cfg->code_len; } } if (cfg->verbose_level > 2) g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset); cpos = bb->max_offset; if (cfg->prof_options & MONO_PROFILE_COVERAGE) { MonoProfileCoverageInfo *cov = cfg->coverage_info; g_assert (!cfg->compile_aot); cpos += 6; cov->data [bb->dfn].cil_code = bb->cil_code; /* this is not thread save, but good enough */ x86_inc_mem (code, &cov->data [bb->dfn].count); } offset = code - cfg->native_code; mono_debug_open_block (cfg, bb, offset); ins = bb->code; while (ins) { offset = code - cfg->native_code; max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN]; if (offset > (cfg->code_size - max_len - 16)) { cfg->code_size *= 2; cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); code = cfg->native_code + offset; mono_jit_stats.code_reallocs++; } mono_debug_record_line_number (cfg, ins, offset); switch (ins->opcode) { case OP_BIGMUL: x86_mul_reg (code, ins->sreg2, TRUE); break; case OP_BIGMUL_UN: x86_mul_reg (code, ins->sreg2, FALSE); break; case OP_X86_SETEQ_MEMBASE: case OP_X86_SETNE_MEMBASE: x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE, ins->inst_basereg, ins->inst_offset, TRUE); break; case OP_STOREI1_MEMBASE_IMM: x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1); break; case OP_STOREI2_MEMBASE_IMM: x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2); break; case OP_STORE_MEMBASE_IMM: case OP_STOREI4_MEMBASE_IMM: x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4); break; case OP_STOREI1_MEMBASE_REG: x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1); break; case OP_STOREI2_MEMBASE_REG: x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2); break; case OP_STORE_MEMBASE_REG: case OP_STOREI4_MEMBASE_REG: x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4); break; case CEE_LDIND_I: case CEE_LDIND_I4: case CEE_LDIND_U4: x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4); break; case OP_LOADU4_MEM: x86_mov_reg_imm (code, ins->dreg, ins->inst_p0); x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4); break; case OP_LOAD_MEMBASE: case OP_LOADI4_MEMBASE: case OP_LOADU4_MEMBASE: x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4); break; case OP_LOADU1_MEMBASE: x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE); break; case OP_LOADI1_MEMBASE: x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE); break; case OP_LOADU2_MEMBASE: x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE); break; case OP_LOADI2_MEMBASE: x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE); break; case CEE_CONV_I1: x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE); break; case CEE_CONV_I2: x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE); break; case CEE_CONV_U1: x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE); break; case CEE_CONV_U2: x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE); break; case OP_COMPARE: x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2); break; case OP_COMPARE_IMM: x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm); break; case OP_X86_COMPARE_MEMBASE_REG: x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2); break; case OP_X86_COMPARE_MEMBASE_IMM: x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_COMPARE_MEMBASE8_IMM: x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_COMPARE_REG_MEMBASE: x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset); break; case OP_X86_COMPARE_MEM_IMM: x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm); break; case OP_X86_TEST_NULL: x86_test_reg_reg (code, ins->sreg1, ins->sreg1); break; case OP_X86_ADD_MEMBASE_IMM: x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_ADD_MEMBASE: x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset); break; case OP_X86_SUB_MEMBASE_IMM: x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_SUB_MEMBASE: x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset); break; case OP_X86_AND_MEMBASE_IMM: x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_OR_MEMBASE_IMM: x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_XOR_MEMBASE_IMM: x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm); break; case OP_X86_INC_MEMBASE: x86_inc_membase (code, ins->inst_basereg, ins->inst_offset); break; case OP_X86_INC_REG: x86_inc_reg (code, ins->dreg); break; case OP_X86_DEC_MEMBASE: x86_dec_membase (code, ins->inst_basereg, ins->inst_offset); break; case OP_X86_DEC_REG: x86_dec_reg (code, ins->dreg); break; case OP_X86_MUL_MEMBASE: x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset); break; case CEE_BREAK: x86_breakpoint (code); break; case OP_ADDCC: case CEE_ADD: x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2); break; case OP_ADC: x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2); break; case OP_ADDCC_IMM: case OP_ADD_IMM: x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm); break; case OP_ADC_IMM: x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm); break; case OP_SUBCC: case CEE_SUB: x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2); break; case OP_SBB: x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2); break; case OP_SUBCC_IMM: case OP_SUB_IMM: x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm); break; case OP_SBB_IMM: x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm); break; case CEE_AND: x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2); break; case OP_AND_IMM: x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm); break; case CEE_DIV: x86_cdq (code); x86_div_reg (code, ins->sreg2, TRUE); break; case CEE_DIV_UN: x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX); x86_div_reg (code, ins->sreg2, FALSE); break; case OP_DIV_IMM: x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm); x86_cdq (code); x86_div_reg (code, ins->sreg2, TRUE); break; case CEE_REM: x86_cdq (code); x86_div_reg (code, ins->sreg2, TRUE); break; case CEE_REM_UN: x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX); x86_div_reg (code, ins->sreg2, FALSE); break; case OP_REM_IMM: x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm); x86_cdq (code); x86_div_reg (code, ins->sreg2, TRUE); break; case CEE_OR: x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2); break; case OP_OR_IMM: x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm); break; case CEE_XOR: x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2); break; case OP_XOR_IMM: x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm); break; case CEE_SHL: g_assert (ins->sreg2 == X86_ECX); x86_shift_reg (code, X86_SHL, ins->dreg); break; case CEE_SHR: g_assert (ins->sreg2 == X86_ECX); x86_shift_reg (code, X86_SAR, ins->dreg); break; case OP_SHR_IMM: x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm); break; case OP_SHR_UN_IMM: x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm); break; case CEE_SHR_UN: g_assert (ins->sreg2 == X86_ECX); x86_shift_reg (code, X86_SHR, ins->dreg); break; case OP_SHL_IMM: x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm); break; case OP_LSHL: { guint8 *jump_to_end; /* handle shifts below 32 bits */ x86_shld_reg (code, ins->unused, ins->sreg1); x86_shift_reg (code, X86_SHL, ins->sreg1); x86_test_reg_imm (code, X86_ECX, 32); jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE); /* handle shift over 32 bit */ x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4); x86_clear_reg (code, ins->sreg1); x86_patch (jump_to_end, code); } break; case OP_LSHR: { guint8 *jump_to_end; /* handle shifts below 32 bits */ x86_shrd_reg (code, ins->sreg1, ins->unused); x86_shift_reg (code, X86_SAR, ins->unused); x86_test_reg_imm (code, X86_ECX, 32); jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE); /* handle shifts over 31 bits */ x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4); x86_shift_reg_imm (code, X86_SAR, ins->unused, 31); x86_patch (jump_to_end, code); } break; case OP_LSHR_UN: { guint8 *jump_to_end; /* handle shifts below 32 bits */ x86_shrd_reg (code, ins->sreg1, ins->unused); x86_shift_reg (code, X86_SHR, ins->unused); x86_test_reg_imm (code, X86_ECX, 32); jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE); /* handle shifts over 31 bits */ x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4); x86_shift_reg_imm (code, X86_SHR, ins->unused, 31); x86_patch (jump_to_end, code); } break; case OP_LSHL_IMM: if (ins->inst_imm >= 32) { x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4); x86_clear_reg (code, ins->sreg1); x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32); } else { x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm); x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm); } break; case OP_LSHR_IMM: if (ins->inst_imm >= 32) { x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4); x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f); x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32); } else { x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm); x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm); } break; case OP_LSHR_UN_IMM: if (ins->inst_imm >= 32) { x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4); x86_clear_reg (code, ins->unused); x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32); } else { x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm); x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm); } break; case CEE_NOT: x86_not_reg (code, ins->sreg1); break; case CEE_NEG: x86_neg_reg (code, ins->sreg1); break; case OP_SEXT_I1: x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE); break; case OP_SEXT_I2: x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE); break; case CEE_MUL: x86_imul_reg_reg (code, ins->sreg1, ins->sreg2); break; case OP_MUL_IMM: switch (ins->inst_imm) { case 2: /* MOV r1, r2 */ /* ADD r1, r1 */ if (ins->dreg != ins->sreg1) x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4); x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg); break; case 3: /* LEA r1, [r2 + r2*2] */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1); break; case 5: /* LEA r1, [r2 + r2*4] */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2); break; case 6: /* LEA r1, [r2 + r2*2] */ /* ADD r1, r1 */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1); x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg); break; case 9: /* LEA r1, [r2 + r2*8] */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3); break; case 10: /* LEA r1, [r2 + r2*4] */ /* ADD r1, r1 */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2); x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg); break; case 12: /* LEA r1, [r2 + r2*2] */ /* SHL r1, 2 */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1); x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2); break; case 25: /* LEA r1, [r2 + r2*4] */ /* LEA r1, [r1 + r1*4] */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2); x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2); break; case 100: /* LEA r1, [r2 + r2*4] */ /* SHL r1, 2 */ /* LEA r1, [r1 + r1*4] */ x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2); x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2); x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2); break; default: x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm); break; } break; case CEE_MUL_OVF: x86_imul_reg_reg (code, ins->sreg1, ins->sreg2); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException"); break; case CEE_MUL_OVF_UN: { /* the mul operation and the exception check should most likely be split */ int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE; /*g_assert (ins->sreg2 == X86_EAX); g_assert (ins->dreg == X86_EAX);*/ if (ins->sreg2 == X86_EAX) { non_eax_reg = ins->sreg1; } else if (ins->sreg1 == X86_EAX) { non_eax_reg = ins->sreg2; } else { /* no need to save since we're going to store to it anyway */ if (ins->dreg != X86_EAX) { saved_eax = TRUE; x86_push_reg (code, X86_EAX); } x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4); non_eax_reg = ins->sreg2; } if (ins->dreg == X86_EDX) { if (!saved_eax) { saved_eax = TRUE; x86_push_reg (code, X86_EAX); } } else if (ins->dreg != X86_EAX) { saved_edx = TRUE; x86_push_reg (code, X86_EDX); } x86_mul_reg (code, non_eax_reg, FALSE); /* save before the check since pop and mov don't change the flags */ if (ins->dreg != X86_EAX) x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4); if (saved_edx) x86_pop_reg (code, X86_EDX); if (saved_eax) x86_pop_reg (code, X86_EAX); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException"); break; } case OP_ICONST: x86_mov_reg_imm (code, ins->dreg, ins->inst_c0); break; case OP_AOTCONST: g_assert_not_reached (); mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); x86_mov_reg_imm (code, ins->dreg, 0); break; case OP_LOAD_GOTADDR: x86_call_imm (code, 0); /* * The patch needs to point to the pop, since the GOT offset needs * to be added to that address. */ mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL); x86_pop_reg (code, ins->dreg); x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0); break; case OP_GOT_ENTRY: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0); x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4); break; case OP_X86_PUSH_GOT_ENTRY: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0); x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0); break; case CEE_CONV_I4: case OP_MOVE: x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4); break; case CEE_CONV_U4: g_assert_not_reached (); case CEE_JMP: { /* * Note: this 'frame destruction' logic is useful for tail calls, too. * Keep in sync with the code in emit_epilog. */ int pos = 0; /* FIXME: no tracing support... */ if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE) code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE); /* reset offset to make max_len work */ offset = code - cfg->native_code; g_assert (!cfg->method->save_lmf); if (cfg->used_int_regs & (1 << X86_EBX)) pos -= 4; if (cfg->used_int_regs & (1 << X86_EDI)) pos -= 4; if (cfg->used_int_regs & (1 << X86_ESI)) pos -= 4; if (pos) x86_lea_membase (code, X86_ESP, X86_EBP, pos); if (cfg->used_int_regs & (1 << X86_ESI)) x86_pop_reg (code, X86_ESI); if (cfg->used_int_regs & (1 << X86_EDI)) x86_pop_reg (code, X86_EDI); if (cfg->used_int_regs & (1 << X86_EBX)) x86_pop_reg (code, X86_EBX); /* restore ESP/EBP */ x86_leave (code); offset = code - cfg->native_code; mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0); x86_jump32 (code, 0); break; } case OP_CHECK_THIS: /* ensure ins->sreg1 is not NULL * note that cmp DWORD PTR [eax], eax is one byte shorter than * cmp DWORD PTR [eax], 0 */ x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1); break; case OP_ARGLIST: { int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX; x86_push_reg (code, hreg); x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie); x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4); x86_pop_reg (code, hreg); break; } case OP_FCALL: case OP_LCALL: case OP_VCALL: case OP_VOIDCALL: case CEE_CALL: call = (MonoCallInst*)ins; if (ins->flags & MONO_INST_HAS_METHOD) code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method); else code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) { /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8 * bytes to pop, we want to use pops. GCC does this (note it won't happen * for P4 or i686 because gcc will avoid using pop push at all. But we aren't * smart enough to do that optimization yet * * It turns out that on my P4, doing two pops for 8 bytes on the stack makes * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small, * (most likely from locality benefits). People with other processors should * check on theirs to see what happens. */ if (call->stack_usage == 4) { /* we want to use registers that won't get used soon, so use * ecx, as eax will get allocated first. edx is used by long calls, * so we can't use that. */ x86_pop_reg (code, X86_ECX); } else { x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage); } } code = emit_move_return_value (cfg, ins, code); break; case OP_FCALL_REG: case OP_LCALL_REG: case OP_VCALL_REG: case OP_VOIDCALL_REG: case OP_CALL_REG: call = (MonoCallInst*)ins; x86_call_reg (code, ins->sreg1); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) { if (call->stack_usage == 4) x86_pop_reg (code, X86_ECX); else x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage); } code = emit_move_return_value (cfg, ins, code); break; case OP_FCALL_MEMBASE: case OP_LCALL_MEMBASE: case OP_VCALL_MEMBASE: case OP_VOIDCALL_MEMBASE: case OP_CALL_MEMBASE: call = (MonoCallInst*)ins; x86_call_membase (code, ins->sreg1, ins->inst_offset); if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) { if (call->stack_usage == 4) x86_pop_reg (code, X86_ECX); else x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage); } code = emit_move_return_value (cfg, ins, code); break; case OP_OUTARG: case OP_X86_PUSH: x86_push_reg (code, ins->sreg1); break; case OP_X86_PUSH_IMM: x86_push_imm (code, ins->inst_imm); break; case OP_X86_PUSH_MEMBASE: x86_push_membase (code, ins->inst_basereg, ins->inst_offset); break; case OP_X86_PUSH_OBJ: x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm); x86_push_reg (code, X86_EDI); x86_push_reg (code, X86_ESI); x86_push_reg (code, X86_ECX); if (ins->inst_offset) x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset); else x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4); x86_lea_membase (code, X86_EDI, X86_ESP, 12); x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2)); x86_cld (code); x86_prefix (code, X86_REP_PREFIX); x86_movsd (code); x86_pop_reg (code, X86_ECX); x86_pop_reg (code, X86_ESI); x86_pop_reg (code, X86_EDI); break; case OP_X86_LEA: x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused); break; case OP_X86_LEA_MEMBASE: x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm); break; case OP_X86_XCHG: x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4); break; case OP_LOCALLOC: /* keep alignment */ x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1); x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1)); code = mono_emit_stack_alloc (code, ins); x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4); break; case CEE_RET: x86_ret (code); break; case CEE_THROW: { x86_push_reg (code, ins->sreg1); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_arch_throw_exception"); break; } case OP_RETHROW: { x86_push_reg (code, ins->sreg1); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_arch_rethrow_exception"); break; } case OP_CALL_HANDLER: /* Align stack */ #ifdef __APPLE__ x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12); #endif mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb); x86_call_imm (code, 0); #ifdef __APPLE__ x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12); #endif break; case OP_LABEL: ins->inst_c0 = code - cfg->native_code; break; case CEE_BR: //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins); //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins) //break; if (ins->flags & MONO_INST_BRLABEL) { if (ins->inst_i0->inst_c0) { x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0); } else { mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0); if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) x86_jump8 (code, 0); else x86_jump32 (code, 0); } } else { if (ins->inst_target_bb->native_offset) { x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); } else { mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb); if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (ins->inst_target_bb->max_offset - cpos)) x86_jump8 (code, 0); else x86_jump32 (code, 0); } } break; case OP_BR_REG: x86_jump_reg (code, ins->sreg1); break; case OP_CEQ: x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_CLT: x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_CLT_UN: x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_CGT: x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_CGT_UN: x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_CNE: x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); break; case OP_COND_EXC_EQ: case OP_COND_EXC_NE_UN: case OP_COND_EXC_LT: case OP_COND_EXC_LT_UN: case OP_COND_EXC_GT: case OP_COND_EXC_GT_UN: case OP_COND_EXC_GE: case OP_COND_EXC_GE_UN: case OP_COND_EXC_LE: case OP_COND_EXC_LE_UN: case OP_COND_EXC_OV: case OP_COND_EXC_NO: case OP_COND_EXC_C: case OP_COND_EXC_NC: EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1); break; case CEE_BEQ: case CEE_BNE_UN: case CEE_BLT: case CEE_BLT_UN: case CEE_BGT: case CEE_BGT_UN: case CEE_BGE: case CEE_BGE_UN: case CEE_BLE: case CEE_BLE_UN: EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN)); break; /* floating point opcodes */ case OP_R8CONST: { double d = *(double *)ins->inst_p0; if ((d == 0.0) && (mono_signbit (d) == 0)) { x86_fldz (code); } else if (d == 1.0) { x86_fld1 (code); } else { if (cfg->compile_aot) { guint32 *val = (guint32*)&d; x86_push_imm (code, val [1]); x86_push_imm (code, val [0]); x86_fld_membase (code, X86_ESP, 0, TRUE); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8); } else { mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0); x86_fld (code, NULL, TRUE); } } break; } case OP_R4CONST: { float f = *(float *)ins->inst_p0; if ((f == 0.0) && (mono_signbit (f) == 0)) { x86_fldz (code); } else if (f == 1.0) { x86_fld1 (code); } else { if (cfg->compile_aot) { guint32 val = *(guint32*)&f; x86_push_imm (code, val); x86_fld_membase (code, X86_ESP, 0, FALSE); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); } else { mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0); x86_fld (code, NULL, FALSE); } } break; } case OP_STORER8_MEMBASE_REG: x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE); break; case OP_LOADR8_SPILL_MEMBASE: x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); x86_fxch (code, 1); break; case OP_LOADR8_MEMBASE: x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); break; case OP_STORER4_MEMBASE_REG: x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE); break; case OP_LOADR4_MEMBASE: x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE); break; case CEE_CONV_R4: /* FIXME: change precision */ case CEE_CONV_R8: x86_push_reg (code, ins->sreg1); x86_fild_membase (code, X86_ESP, 0, FALSE); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); break; case OP_X86_FP_LOAD_I8: x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE); break; case OP_X86_FP_LOAD_I4: x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE); break; case OP_FCONV_TO_I1: code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE); break; case OP_FCONV_TO_U1: code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE); break; case OP_FCONV_TO_I2: code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE); break; case OP_FCONV_TO_U2: code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE); break; case OP_FCONV_TO_I4: case OP_FCONV_TO_I: code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE); break; case OP_FCONV_TO_I8: x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4); x86_fnstcw_membase(code, X86_ESP, 0); x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2); x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00); x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2); x86_fldcw_membase (code, X86_ESP, 2); x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8); x86_fist_pop_membase (code, X86_ESP, 0, TRUE); x86_pop_reg (code, ins->dreg); x86_pop_reg (code, ins->unused); x86_fldcw_membase (code, X86_ESP, 0); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); break; case OP_LCONV_TO_R_UN: { static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 }; guint8 *br; /* load 64bit integer to FP stack */ x86_push_imm (code, 0); x86_push_reg (code, ins->sreg2); x86_push_reg (code, ins->sreg1); x86_fild_membase (code, X86_ESP, 0, TRUE); /* store as 80bit FP value */ x86_fst80_membase (code, X86_ESP, 0); /* test if lreg is negative */ x86_test_reg_reg (code, ins->sreg2, ins->sreg2); br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE); /* add correction constant mn */ x86_fld80_mem (code, mn); x86_fld80_membase (code, X86_ESP, 0); x86_fp_op_reg (code, X86_FADD, 1, TRUE); x86_fst80_membase (code, X86_ESP, 0); x86_patch (br, code); x86_fld80_membase (code, X86_ESP, 0); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12); break; } case OP_LCONV_TO_OVF_I: { guint8 *br [3], *label [1]; MonoInst *tins; /* * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000 */ x86_test_reg_reg (code, ins->sreg1, ins->sreg1); /* If the low word top bit is set, see if we are negative */ br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE); /* We are not negative (no top bit set, check for our top word to be zero */ x86_test_reg_reg (code, ins->sreg2, ins->sreg2); br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE); label [0] = code; /* throw exception */ tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException"); if (tins) { mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb); if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos)) x86_jump8 (code, 0); else x86_jump32 (code, 0); } else { mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException"); x86_jump32 (code, 0); } x86_patch (br [0], code); /* our top bit is set, check that top word is 0xfffffff */ x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff); x86_patch (br [1], code); /* nope, emit exception */ br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE); x86_patch (br [2], label [0]); if (ins->dreg != ins->sreg1) x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4); break; } case OP_FADD: x86_fp_op_reg (code, X86_FADD, 1, TRUE); break; case OP_FSUB: x86_fp_op_reg (code, X86_FSUB, 1, TRUE); break; case OP_FMUL: x86_fp_op_reg (code, X86_FMUL, 1, TRUE); break; case OP_FDIV: x86_fp_op_reg (code, X86_FDIV, 1, TRUE); break; case OP_FNEG: x86_fchs (code); break; case OP_SIN: x86_fsin (code); x86_fldz (code); x86_fp_op_reg (code, X86_FADD, 1, TRUE); break; case OP_COS: x86_fcos (code); x86_fldz (code); x86_fp_op_reg (code, X86_FADD, 1, TRUE); break; case OP_ABS: x86_fabs (code); break; case OP_TAN: { /* * it really doesn't make sense to inline all this code, * it's here just to show that things may not be as simple * as they appear. */ guchar *check_pos, *end_tan, *pop_jump; x86_push_reg (code, X86_EAX); x86_fptan (code); x86_fnstsw (code); x86_test_reg_imm (code, X86_EAX, X86_FP_C2); check_pos = code; x86_branch8 (code, X86_CC_NE, 0, FALSE); x86_fstp (code, 0); /* pop the 1.0 */ end_tan = code; x86_jump8 (code, 0); x86_fldpi (code); x86_fp_op (code, X86_FADD, 0); x86_fxch (code, 1); x86_fprem1 (code); x86_fstsw (code); x86_test_reg_imm (code, X86_EAX, X86_FP_C2); pop_jump = code; x86_branch8 (code, X86_CC_NE, 0, FALSE); x86_fstp (code, 1); x86_fptan (code); x86_patch (pop_jump, code); x86_fstp (code, 0); /* pop the 1.0 */ x86_patch (check_pos, code); x86_patch (end_tan, code); x86_fldz (code); x86_fp_op_reg (code, X86_FADD, 1, TRUE); x86_pop_reg (code, X86_EAX); break; } case OP_ATAN: x86_fld1 (code); x86_fpatan (code); x86_fldz (code); x86_fp_op_reg (code, X86_FADD, 1, TRUE); break; case OP_SQRT: x86_fsqrt (code); break; case OP_X86_FPOP: x86_fstp (code, 0); break; case OP_FREM: { guint8 *l1, *l2; x86_push_reg (code, X86_EAX); /* we need to exchange ST(0) with ST(1) */ x86_fxch (code, 1); /* this requires a loop, because fprem somtimes * returns a partial remainder */ l1 = code; /* looks like MS is using fprem instead of the IEEE compatible fprem1 */ /* x86_fprem1 (code); */ x86_fprem (code); x86_fnstsw (code); x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2); l2 = code + 2; x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE); /* pop result */ x86_fstp (code, 1); x86_pop_reg (code, X86_EAX); break; } case OP_FCOMPARE: if (cfg->opt & MONO_OPT_FCMOV) { x86_fcomip (code, 1); x86_fstp (code, 0); break; } /* this overwrites EAX */ EMIT_FPCOMPARE(code); x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK); break; case OP_FCEQ: if (cfg->opt & MONO_OPT_FCMOV) { /* zeroing the register at the start results in * shorter and faster code (we can also remove the widening op) */ guchar *unordered_check; x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); x86_fcomip (code, 1); x86_fstp (code, 0); unordered_check = code; x86_branch8 (code, X86_CC_P, 0, FALSE); x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE); x86_patch (unordered_check, code); break; } if (ins->dreg != X86_EAX) x86_push_reg (code, X86_EAX); EMIT_FPCOMPARE(code); x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK); x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000); x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); if (ins->dreg != X86_EAX) x86_pop_reg (code, X86_EAX); break; case OP_FCLT: case OP_FCLT_UN: if (cfg->opt & MONO_OPT_FCMOV) { /* zeroing the register at the start results in * shorter and faster code (we can also remove the widening op) */ x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); x86_fcomip (code, 1); x86_fstp (code, 0); if (ins->opcode == OP_FCLT_UN) { guchar *unordered_check = code; guchar *jump_to_end; x86_branch8 (code, X86_CC_P, 0, FALSE); x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE); jump_to_end = code; x86_jump8 (code, 0); x86_patch (unordered_check, code); x86_inc_reg (code, ins->dreg); x86_patch (jump_to_end, code); } else { x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE); } break; } if (ins->dreg != X86_EAX) x86_push_reg (code, X86_EAX); EMIT_FPCOMPARE(code); x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK); if (ins->opcode == OP_FCLT_UN) { guchar *is_not_zero_check, *end_jump; is_not_zero_check = code; x86_branch8 (code, X86_CC_NZ, 0, TRUE); end_jump = code; x86_jump8 (code, 0); x86_patch (is_not_zero_check, code); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK); x86_patch (end_jump, code); } x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); if (ins->dreg != X86_EAX) x86_pop_reg (code, X86_EAX); break; case OP_FCGT: case OP_FCGT_UN: if (cfg->opt & MONO_OPT_FCMOV) { /* zeroing the register at the start results in * shorter and faster code (we can also remove the widening op) */ guchar *unordered_check; x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg); x86_fcomip (code, 1); x86_fstp (code, 0); if (ins->opcode == OP_FCGT) { unordered_check = code; x86_branch8 (code, X86_CC_P, 0, FALSE); x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE); x86_patch (unordered_check, code); } else { x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE); } break; } if (ins->dreg != X86_EAX) x86_push_reg (code, X86_EAX); EMIT_FPCOMPARE(code); x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0); if (ins->opcode == OP_FCGT_UN) { guchar *is_not_zero_check, *end_jump; is_not_zero_check = code; x86_branch8 (code, X86_CC_NZ, 0, TRUE); end_jump = code; x86_jump8 (code, 0); x86_patch (is_not_zero_check, code); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK); x86_patch (end_jump, code); } x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE); x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); if (ins->dreg != X86_EAX) x86_pop_reg (code, X86_EAX); break; case OP_FBEQ: if (cfg->opt & MONO_OPT_FCMOV) { guchar *jump = code; x86_branch8 (code, X86_CC_P, 0, TRUE); EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); x86_patch (jump, code); break; } x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000); EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE); break; case OP_FBNE_UN: /* Branch if C013 != 100 */ if (cfg->opt & MONO_OPT_FCMOV) { /* branch if !ZF or (PF|CF) */ EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); EMIT_COND_BRANCH (ins, X86_CC_B, FALSE); break; } x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3); EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); break; case OP_FBLT: if (cfg->opt & MONO_OPT_FCMOV) { EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); break; } EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; case OP_FBLT_UN: if (cfg->opt & MONO_OPT_FCMOV) { EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE); break; } if (ins->opcode == OP_FBLT_UN) { guchar *is_not_zero_check, *end_jump; is_not_zero_check = code; x86_branch8 (code, X86_CC_NZ, 0, TRUE); end_jump = code; x86_jump8 (code, 0); x86_patch (is_not_zero_check, code); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK); x86_patch (end_jump, code); } EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; case OP_FBGT: case OP_FBGT_UN: if (cfg->opt & MONO_OPT_FCMOV) { EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE); break; } x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0); if (ins->opcode == OP_FBGT_UN) { guchar *is_not_zero_check, *end_jump; is_not_zero_check = code; x86_branch8 (code, X86_CC_NZ, 0, TRUE); end_jump = code; x86_jump8 (code, 0); x86_patch (is_not_zero_check, code); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK); x86_patch (end_jump, code); } EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; case OP_FBGE: /* Branch if C013 == 100 or 001 */ if (cfg->opt & MONO_OPT_FCMOV) { guchar *br1; /* skip branch if C1=1 */ br1 = code; x86_branch8 (code, X86_CC_P, 0, FALSE); /* branch if (C0 | C3) = 1 */ EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE); x86_patch (br1, code); break; } x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0); EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3); EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; case OP_FBGE_UN: /* Branch if C013 == 000 */ if (cfg->opt & MONO_OPT_FCMOV) { EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE); break; } EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); break; case OP_FBLE: /* Branch if C013=000 or 100 */ if (cfg->opt & MONO_OPT_FCMOV) { guchar *br1; /* skip branch if C1=1 */ br1 = code; x86_branch8 (code, X86_CC_P, 0, FALSE); /* branch if C0=0 */ EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE); x86_patch (br1, code); break; } x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1)); x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0); EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE); break; case OP_FBLE_UN: /* Branch if C013 != 001 */ if (cfg->opt & MONO_OPT_FCMOV) { EMIT_COND_BRANCH (ins, X86_CC_P, FALSE); EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE); break; } x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0); EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE); break; case CEE_CKFINITE: { x86_push_reg (code, X86_EAX); x86_fxam (code); x86_fnstsw (code); x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100); x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0); x86_pop_reg (code, X86_EAX); EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException"); break; } case OP_TLS_GET: { code = emit_tls_get (code, ins->dreg, ins->inst_offset); break; } case OP_MEMORY_BARRIER: { /* Not needed on x86 */ break; } case OP_ATOMIC_ADD_I4: { int dreg = ins->dreg; if (dreg == ins->inst_basereg) { x86_push_reg (code, ins->sreg2); dreg = ins->sreg2; } if (dreg != ins->sreg2) x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4); x86_prefix (code, X86_LOCK_PREFIX); x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4); if (dreg != ins->dreg) { x86_mov_reg_reg (code, ins->dreg, dreg, 4); x86_pop_reg (code, dreg); } break; } case OP_ATOMIC_ADD_NEW_I4: { int dreg = ins->dreg; /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */ if (ins->sreg2 == dreg) { if (dreg == X86_EBX) { dreg = X86_EDI; if (ins->inst_basereg == X86_EDI) dreg = X86_ESI; } else { dreg = X86_EBX; if (ins->inst_basereg == X86_EBX) dreg = X86_EDI; } } else if (ins->inst_basereg == dreg) { if (dreg == X86_EBX) { dreg = X86_EDI; if (ins->sreg2 == X86_EDI) dreg = X86_ESI; } else { dreg = X86_EBX; if (ins->sreg2 == X86_EBX) dreg = X86_EDI; } } if (dreg != ins->dreg) { x86_push_reg (code, dreg); } x86_mov_reg_reg (code, dreg, ins->sreg2, 4); x86_prefix (code, X86_LOCK_PREFIX); x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4); /* dreg contains the old value, add with sreg2 value */ x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2); if (ins->dreg != dreg) { x86_mov_reg_reg (code, ins->dreg, dreg, 4); x86_pop_reg (code, dreg); } break; } case OP_ATOMIC_EXCHANGE_I4: { guchar *br[2]; int sreg2 = ins->sreg2; int breg = ins->inst_basereg; /* cmpxchg uses eax as comperand, need to make sure we can use it * hack to overcome limits in x86 reg allocator * (req: dreg == eax and sreg2 != eax and breg != eax) */ if (ins->dreg != X86_EAX) x86_push_reg (code, X86_EAX); /* We need the EAX reg for the cmpxchg */ if (ins->sreg2 == X86_EAX) { x86_push_reg (code, X86_EDX); x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4); sreg2 = X86_EDX; } if (breg == X86_EAX) { x86_push_reg (code, X86_ESI); x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4); breg = X86_ESI; } x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4); br [0] = code; x86_prefix (code, X86_LOCK_PREFIX); x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2); br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE); x86_patch (br [1], br [0]); if (breg != ins->inst_basereg) x86_pop_reg (code, X86_ESI); if (ins->dreg != X86_EAX) { x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4); x86_pop_reg (code, X86_EAX); } if (ins->sreg2 != sreg2) x86_pop_reg (code, X86_EDX); break; } default: g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); g_assert_not_reached (); } if ((code - cfg->native_code - offset) > max_len) { g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)", mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset); g_assert_not_reached (); } cpos += max_len; last_ins = ins; last_offset = offset; ins = ins->next; } cfg->code_len = code - cfg->native_code; } void mono_arch_register_lowlevel_calls (void) { } void mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors) { MonoJumpInfo *patch_info; gboolean compile_aot = !run_cctors; for (patch_info = ji; patch_info; patch_info = patch_info->next) { unsigned char *ip = patch_info->ip.i + code; const unsigned char *target; target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors); if (compile_aot) { switch (patch_info->type) { case MONO_PATCH_INFO_BB: case MONO_PATCH_INFO_LABEL: break; default: /* No need to patch these */ continue; } } switch (patch_info->type) { case MONO_PATCH_INFO_IP: *((gconstpointer *)(ip)) = target; break; case MONO_PATCH_INFO_CLASS_INIT: { guint8 *code = ip; /* Might already been changed to a nop */ x86_call_code (code, 0); x86_patch (ip, target); break; } case MONO_PATCH_INFO_ABS: case MONO_PATCH_INFO_METHOD: case MONO_PATCH_INFO_METHOD_JUMP: case MONO_PATCH_INFO_INTERNAL_METHOD: case MONO_PATCH_INFO_BB: case MONO_PATCH_INFO_LABEL: x86_patch (ip, target); break; case MONO_PATCH_INFO_NONE: break; default: { guint32 offset = mono_arch_get_patch_offset (ip); *((gconstpointer *)(ip + offset)) = target; break; } } } } guint8 * mono_arch_emit_prolog (MonoCompile *cfg) { MonoMethod *method = cfg->method; MonoBasicBlock *bb; MonoMethodSignature *sig; MonoInst *inst; int alloc_size, pos, max_offset, i; guint8 *code; cfg->code_size = MAX (mono_method_get_header (method)->code_size * 4, 256); code = cfg->native_code = g_malloc (cfg->code_size); x86_push_reg (code, X86_EBP); x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4); alloc_size = cfg->stack_offset; pos = 0; if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) { /* Might need to attach the thread to the JIT */ if (lmf_tls_offset != -1) { guint8 *buf; code = emit_tls_get ( code, X86_EAX, lmf_tls_offset); x86_test_reg_reg (code, X86_EAX, X86_EAX); buf = code; x86_branch8 (code, X86_CC_NE, 0, 0); x86_push_imm (code, cfg->domain); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach"); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); x86_patch (buf, code); #ifdef PLATFORM_WIN32 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */ /* FIXME: Add a separate key for LMF to avoid this */ x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf)); #endif } else { g_assert (!cfg->compile_aot); x86_push_imm (code, cfg->domain); code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach"); x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4); } } if (method->save_lmf) { pos += sizeof (MonoLMF); /* save the current IP */ mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL); x86_push_imm_template (code); /* save all caller saved regs */ x86_push_reg (code, X86_EBP); x86_push_reg (code, X86_ESI); x86_push_reg (code, X86_EDI); x86_push_reg (code, X86_EBX); /* save method info */ x86_push_imm (code, method); /* get the address of lmf for the current thread */ /* * This is performance critical so we try to use some tricks to make * it fast. */ if (lmf_tls_offset != -1) { /* Load lmf quicky using the GS register */ code = emit_tls_get (code, X86_EAX, lmf_tls_offset); #ifdef PLATFORM_WIN32 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */ /* FIXME: Add a separate key for LMF to avoid this */ x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf)); #endif } else { code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr"); } /* push lmf */ x86_push_reg (code, X86_EAX); /* push *lfm (previous_lmf) */ x86_push_membase (code, X86_EAX, 0); /* *(lmf) = ESP */ x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4); } else { if (cfg->used_int_regs & (1 << X86_EBX)) { x86_push_reg (code, X86_EBX); pos += 4; } if (cfg->used_int_regs & (1 << X86_EDI)) { x86_push_reg (code, X86_EDI); pos += 4; } if (cfg->used_int_regs & (1 << X86_ESI)) { x86_push_reg (code, X86_ESI); pos += 4; } } alloc_size -= pos; #if __APPLE__ /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */ { int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */ if (tot & 4) { tot += 4; alloc_size += 4; } if (tot & 8) { alloc_size += 8; } } #endif if (alloc_size) { /* See mono_emit_stack_alloc */ #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK) guint32 remaining_size = alloc_size; while (remaining_size >= 0x1000) { x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000); x86_test_membase_reg (code, X86_ESP, 0, X86_ESP); remaining_size -= 0x1000; } if (remaining_size) x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size); #else x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size); #endif } #if __APPLE_ /* check the stack is aligned */ x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4); x86_alu_reg_imm (code, X86_AND, X86_EDX, 15); x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0); x86_branch_disp (code, X86_CC_EQ, 3, FALSE); x86_breakpoint (code); #endif /* compute max_offset in order to use short forward jumps */ max_offset = 0; if (cfg->opt & MONO_OPT_BRANCH) { for (bb = cfg->bb_entry; bb; bb = bb->next_bb) { MonoInst *ins = bb->code; bb->max_offset = max_offset; if (cfg->prof_options & MONO_PROFILE_COVERAGE) max_offset += 6; /* max alignment for loops */ if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb)) max_offset += LOOP_ALIGNMENT; while (ins) { if (ins->opcode == OP_LABEL) ins->inst_c1 = max_offset; max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN]; ins = ins->next; } } } if (mono_jit_trace_calls != NULL && mono_trace_eval (method)) code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE); /* load arguments allocated to register from the stack */ sig = mono_method_signature (method); pos = 0; for (i = 0; i < sig->param_count + sig->hasthis; ++i) { inst = cfg->varinfo [pos]; if (inst->opcode == OP_REGVAR) { x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4); if (cfg->verbose_level > 2) g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg)); } pos++; } cfg->code_len = code - cfg->native_code; return code; } void mono_arch_emit_epilog (MonoCompile *cfg) { MonoMethod *method = cfg->method; MonoMethodSignature *sig = mono_method_signature (method); int quad, pos; guint32 stack_to_pop; guint8 *code; int max_epilog_size = 16; CallInfo *cinfo; if (cfg->method->save_lmf) max_epilog_size += 128; if (mono_jit_trace_calls != NULL) max_epilog_size += 50; while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) { cfg->code_size *= 2; cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); mono_jit_stats.code_reallocs++; } code = cfg->native_code + cfg->code_len; if (mono_jit_trace_calls != NULL && mono_trace_eval (method)) code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE); /* the code restoring the registers must be kept in sync with CEE_JMP */ pos = 0; if (method->save_lmf) { gint32 prev_lmf_reg; gint32 lmf_offset = -sizeof (MonoLMF); /* Find a spare register */ switch (sig->ret->type) { case MONO_TYPE_I8: case MONO_TYPE_U8: prev_lmf_reg = X86_EDI; cfg->used_int_regs |= (1 << X86_EDI); break; default: prev_lmf_reg = X86_EDX; break; } /* reg = previous_lmf */ x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4); /* ecx = lmf */ x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4); /* *(lmf) = previous_lmf */ x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4); /* restore caller saved regs */ if (cfg->used_int_regs & (1 << X86_EBX)) { x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4); } if (cfg->used_int_regs & (1 << X86_EDI)) { x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4); } if (cfg->used_int_regs & (1 << X86_ESI)) { x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4); } /* EBP is restored by LEAVE */ } else { if (cfg->used_int_regs & (1 << X86_EBX)) { pos -= 4; } if (cfg->used_int_regs & (1 << X86_EDI)) { pos -= 4; } if (cfg->used_int_regs & (1 << X86_ESI)) { pos -= 4; } if (pos) x86_lea_membase (code, X86_ESP, X86_EBP, pos); if (cfg->used_int_regs & (1 << X86_ESI)) { x86_pop_reg (code, X86_ESI); } if (cfg->used_int_regs & (1 << X86_EDI)) { x86_pop_reg (code, X86_EDI); } if (cfg->used_int_regs & (1 << X86_EBX)) { x86_pop_reg (code, X86_EBX); } } /* Load returned vtypes into registers if needed */ cinfo = get_call_info (sig, FALSE); if (cinfo->ret.storage == ArgValuetypeInReg) { for (quad = 0; quad < 2; quad ++) { switch (cinfo->ret.pair_storage [quad]) { case ArgInIReg: x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4); break; case ArgOnFloatFpStack: x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE); break; case ArgOnDoubleFpStack: x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE); break; case ArgNone: break; default: g_assert_not_reached (); } } } x86_leave (code); if (CALLCONV_IS_STDCALL (sig)) { MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1)); stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info); } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack)) stack_to_pop = 4; else stack_to_pop = 0; if (stack_to_pop) x86_ret_imm (code, stack_to_pop); else x86_ret (code); g_free (cinfo); cfg->code_len = code - cfg->native_code; g_assert (cfg->code_len < cfg->code_size); } void mono_arch_emit_exceptions (MonoCompile *cfg) { MonoJumpInfo *patch_info; int nthrows, i; guint8 *code; MonoClass *exc_classes [16]; guint8 *exc_throw_start [16], *exc_throw_end [16]; guint32 code_size; int exc_count = 0; /* Compute needed space */ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) { if (patch_info->type == MONO_PATCH_INFO_EXC) exc_count++; } /* * make sure we have enough space for exceptions * 16 is the size of two push_imm instructions and a call */ if (cfg->compile_aot) code_size = exc_count * 32; else code_size = exc_count * 16; while (cfg->code_len + code_size > (cfg->code_size - 16)) { cfg->code_size *= 2; cfg->native_code = g_realloc (cfg->native_code, cfg->code_size); mono_jit_stats.code_reallocs++; } code = cfg->native_code + cfg->code_len; nthrows = 0; for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) { switch (patch_info->type) { case MONO_PATCH_INFO_EXC: { MonoClass *exc_class; guint8 *buf, *buf2; guint32 throw_ip; x86_patch (patch_info->ip.i + cfg->native_code, code); exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name); g_assert (exc_class); throw_ip = patch_info->ip.i; /* Find a throw sequence for the same exception class */ for (i = 0; i < nthrows; ++i) if (exc_classes [i] == exc_class) break; if (i < nthrows) { x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip); x86_jump_code (code, exc_throw_start [i]); patch_info->type = MONO_PATCH_INFO_NONE; } else { guint32 size; /* Compute size of code following the push */ size = 5 + 5; if ((code - cfg->native_code) - throw_ip < 126 - size) { /* Use the shorter form */ buf = buf2 = code; x86_push_imm (code, 0); } else { buf = code; x86_push_imm (code, 0xf0f0f0f0); buf2 = code; } if (nthrows < 16) { exc_classes [nthrows] = exc_class; exc_throw_start [nthrows] = code; } x86_push_imm (code, exc_class->type_token); patch_info->data.name = "mono_arch_throw_corlib_exception"; patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD; patch_info->ip.i = code - cfg->native_code; x86_call_code (code, 0); x86_push_imm (buf, (code - cfg->native_code) - throw_ip); while (buf < buf2) x86_nop (buf); if (nthrows < 16) { exc_throw_end [nthrows] = code; nthrows ++; } } break; } default: /* do nothing */ break; } } cfg->code_len = code - cfg->native_code; g_assert (cfg->code_len < cfg->code_size); } void mono_arch_flush_icache (guint8 *code, gint size) { /* not needed */ } void mono_arch_flush_register_windows (void) { } /* * Support for fast access to the thread-local lmf structure using the GS * segment register on NPTL + kernel 2.6.x. */ static gboolean tls_offset_inited = FALSE; void mono_arch_setup_jit_tls_data (MonoJitTlsData *tls) { if (!tls_offset_inited) { if (!getenv ("MONO_NO_TLS")) { #ifdef PLATFORM_WIN32 /* * We need to init this multiple times, since when we are first called, the key might not * be initialized yet. */ appdomain_tls_offset = mono_domain_get_tls_key (); lmf_tls_offset = mono_get_jit_tls_key (); thread_tls_offset = mono_thread_get_tls_key (); /* Only 64 tls entries can be accessed using inline code */ if (appdomain_tls_offset >= 64) appdomain_tls_offset = -1; if (lmf_tls_offset >= 64) lmf_tls_offset = -1; if (thread_tls_offset >= 64) thread_tls_offset = -1; #else #if MONO_XEN_OPT optimize_for_xen = access ("/proc/xen", F_OK) == 0; #endif tls_offset_inited = TRUE; appdomain_tls_offset = mono_domain_get_tls_offset (); lmf_tls_offset = mono_get_lmf_tls_offset (); thread_tls_offset = mono_thread_get_tls_offset (); #endif } } } void mono_arch_free_jit_tls_data (MonoJitTlsData *tls) { } void mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg) { MonoCallInst *call = (MonoCallInst*)inst; CallInfo *cinfo = get_call_info (inst->signature, FALSE); /* add the this argument */ if (this_reg != -1) { if (cinfo->args [0].storage == ArgInIReg) { MonoInst *this; MONO_INST_NEW (cfg, this, OP_MOVE); this->type = this_type; this->sreg1 = this_reg; this->dreg = mono_regstate_next_int (cfg->rs); mono_bblock_add_inst (cfg->cbb, this); mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE); } else { MonoInst *this; MONO_INST_NEW (cfg, this, OP_OUTARG); this->type = this_type; this->sreg1 = this_reg; mono_bblock_add_inst (cfg->cbb, this); } } if (vt_reg != -1) { MonoInst *vtarg; if (cinfo->ret.storage == ArgValuetypeInReg) { /* * The valuetype is in EAX:EDX after the call, needs to be copied to * the stack. Save the address here, so the call instruction can * access it. */ MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG); vtarg->inst_destbasereg = X86_ESP; vtarg->inst_offset = inst->stack_usage; vtarg->sreg1 = vt_reg; mono_bblock_add_inst (cfg->cbb, vtarg); } else if (cinfo->ret.storage == ArgInIReg) { /* The return address is passed in a register */ MONO_INST_NEW (cfg, vtarg, OP_MOVE); vtarg->sreg1 = vt_reg; vtarg->dreg = mono_regstate_next_int (cfg->rs); mono_bblock_add_inst (cfg->cbb, vtarg); mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE); } else { MonoInst *vtarg; MONO_INST_NEW (cfg, vtarg, OP_OUTARG); vtarg->type = STACK_MP; vtarg->sreg1 = vt_reg; mono_bblock_add_inst (cfg->cbb, vtarg); } } g_free (cinfo); } MonoInst* mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { MonoInst *ins = NULL; if (cmethod->klass == mono_defaults.math_class) { if (strcmp (cmethod->name, "Sin") == 0) { MONO_INST_NEW (cfg, ins, OP_SIN); ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Cos") == 0) { MONO_INST_NEW (cfg, ins, OP_COS); ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Tan") == 0) { MONO_INST_NEW (cfg, ins, OP_TAN); ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Atan") == 0) { MONO_INST_NEW (cfg, ins, OP_ATAN); ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Sqrt") == 0) { MONO_INST_NEW (cfg, ins, OP_SQRT); ins->inst_i0 = args [0]; } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) { MONO_INST_NEW (cfg, ins, OP_ABS); ins->inst_i0 = args [0]; } #if 0 /* OP_FREM is not IEEE compatible */ else if (strcmp (cmethod->name, "IEEERemainder") == 0) { MONO_INST_NEW (cfg, ins, OP_FREM); ins->inst_i0 = args [0]; ins->inst_i1 = args [1]; } #endif } else if (cmethod->klass == mono_defaults.thread_class && strcmp (cmethod->name, "MemoryBarrier") == 0) { MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER); } else if(cmethod->klass->image == mono_defaults.corlib && (strcmp (cmethod->klass->name_space, "System.Threading") == 0) && (strcmp (cmethod->klass->name, "Interlocked") == 0)) { if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) { MonoInst *ins_iconst; MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4); MONO_INST_NEW (cfg, ins_iconst, OP_ICONST); ins_iconst->inst_c0 = 1; ins->inst_i0 = args [0]; ins->inst_i1 = ins_iconst; } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) { MonoInst *ins_iconst; MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4); MONO_INST_NEW (cfg, ins_iconst, OP_ICONST); ins_iconst->inst_c0 = -1; ins->inst_i0 = args [0]; ins->inst_i1 = ins_iconst; } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) { MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4); ins->inst_i0 = args [0]; ins->inst_i1 = args [1]; } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) { MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4); ins->inst_i0 = args [0]; ins->inst_i1 = args [1]; } } return ins; } gboolean mono_arch_print_tree (MonoInst *tree, int arity) { return 0; } MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg) { MonoInst* ins; if (appdomain_tls_offset == -1) return NULL; MONO_INST_NEW (cfg, ins, OP_TLS_GET); ins->inst_offset = appdomain_tls_offset; return ins; } MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg) { MonoInst* ins; if (thread_tls_offset == -1) return NULL; MONO_INST_NEW (cfg, ins, OP_TLS_GET); ins->inst_offset = thread_tls_offset; return ins; } guint32 mono_arch_get_patch_offset (guint8 *code) { if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2)) return 2; else if ((code [0] == 0xba)) return 1; else if ((code [0] == 0x68)) /* push IMM */ return 1; else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6)) /* push () */ return 2; else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2)) /* call *() */ return 2; else if ((code [0] == 0xdd) || (code [0] == 0xd9)) /* fldl */ return 2; else if ((code [0] == 0x58) && (code [1] == 0x05)) /* pop %eax; add , %eax */ return 2; else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81)) /* pop ; add , */ return 3; else { g_assert_not_reached (); return -1; } } gpointer* mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs) { guint8 reg = 0; gint32 disp = 0; /* go to the start of the call instruction * * address_byte = (m << 6) | (o << 3) | reg * call opcode: 0xff address_byte displacement * 0xff m=1,o=2 imm8 * 0xff m=2,o=2 imm32 */ code -= 6; /* * A given byte sequence can match more than case here, so we have to be * really careful about the ordering of the cases. Longer sequences * come first. */ if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) { /* * This is an interface call * 8b 80 0c e8 ff ff mov 0xffffe80c(%eax),%eax * ff 10 call *(%eax) */ reg = x86_modrm_rm (code [5]); disp = 0; } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) { reg = code [4] & 0x07; disp = (signed char)code [5]; } else { if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) { reg = code [1] & 0x07; disp = *((gint32*)(code + 2)); } else if ((code [1] == 0xe8)) { return NULL; } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) { /* * This is a interface call * 8b 40 30 mov 0x30(%eax),%eax * ff 10 call *(%eax) */ disp = 0; reg = code [5] & 0x07; } else return NULL; } return (gpointer*)(((gint32)(regs [reg])) + disp); } gpointer* mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs) { guint8 reg = 0; gint32 disp = 0; code -= 7; if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) { reg = x86_modrm_rm (code [1]); disp = code [4]; if (reg == X86_EAX) return NULL; else return (gpointer*)(((gint32)(regs [reg])) + disp); } return NULL; }