2 * mini-amd64.c: AMD64 backend for the Mono code generator
7 * Paolo Molaro (lupus@ximian.com)
8 * Dietmar Maurer (dietmar@ximian.com)
11 * (C) 2003 Ximian, Inc.
17 #include <mono/metadata/appdomain.h>
18 #include <mono/metadata/debug-helpers.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
23 #include "mini-amd64.h"
25 #include "cpu-amd64.h"
27 static gint lmf_tls_offset = -1;
30 /* Under windows, the default pinvoke calling convention is stdcall */
31 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
33 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
36 #define SIGNAL_STACK_SIZE (64 * 1024)
38 #define ARGS_OFFSET 16
39 #define GP_SCRATCH_REG AMD64_R11
42 * AMD64 register usage:
43 * - callee saved registers are used for global register allocation
44 * - %r11 is used for materializing 64 bit constants in opcodes
45 * - the rest is used for local allocation
48 #define NOT_IMPLEMENTED g_assert_not_reached ()
51 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar);
54 mono_arch_regname (int reg) {
56 case AMD64_RAX: return "%rax";
57 case AMD64_RBX: return "%rbx";
58 case AMD64_RCX: return "%rcx";
59 case AMD64_RDX: return "%rdx";
60 case AMD64_RSP: return "%rsp";
61 case AMD64_RBP: return "%rbp";
62 case AMD64_RDI: return "%rdi";
63 case AMD64_RSI: return "%rsi";
64 case AMD64_R8: return "%r8";
65 case AMD64_R9: return "%r9";
66 case AMD64_R10: return "%r10";
67 case AMD64_R11: return "%r11";
68 case AMD64_R12: return "%r12";
69 case AMD64_R13: return "%r13";
70 case AMD64_R14: return "%r14";
71 case AMD64_R15: return "%r15";
77 amd64_patch (unsigned char* code, gpointer target)
81 if ((code [0] >= 0x40) && (code [0] <= 0x4f))
84 x86_patch (code, (unsigned char*)target);
109 #define DEBUG(a) if (cfg->verbose_level > 1) a
113 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
116 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
118 ainfo->offset = *stack_size;
120 if (*gr >= PARAM_REGS) {
121 ainfo->storage = ArgOnStack;
122 (*stack_size) += sizeof (gpointer);
125 ainfo->storage = ArgInIReg;
126 ainfo->reg = param_regs [*gr];
131 #define FLOAT_PARAM_REGS 8
134 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
136 ainfo->offset = *stack_size;
138 if (*gr >= FLOAT_PARAM_REGS) {
139 ainfo->storage = ArgOnStack;
140 (*stack_size) += sizeof (gpointer);
143 /* A double register */
144 ainfo->storage = ArgInSSEReg;
153 * Obtain information about a call according to the calling convention.
154 * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement
155 * Draft Version 0.23" document for more information.
158 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
160 guint32 i, gr, fr, simpletype;
161 int n = sig->hasthis + sig->param_count;
162 guint32 stack_size = 0;
165 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
170 if (((sig->ret->type == MONO_TYPE_VALUETYPE) && !sig->ret->data.klass->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
171 add_general (&gr, &stack_size, &cinfo->ret);
176 add_general (&gr, &stack_size, cinfo->args + 0);
178 for (i = 0; i < sig->param_count; ++i) {
179 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
181 if ((sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
182 /* Emit the signature cookie just before the implicit arguments */
183 add_general (&gr, &stack_size, &cinfo->sig_cookie);
184 /* Prevent implicit arguments from being passed in registers */
188 if (sig->params [i]->byref) {
189 add_general (&gr, &stack_size, ainfo);
192 simpletype = sig->params [i]->type;
194 switch (simpletype) {
195 case MONO_TYPE_BOOLEAN:
198 add_general (&gr, &stack_size, ainfo);
203 add_general (&gr, &stack_size, ainfo);
207 add_general (&gr, &stack_size, ainfo);
212 case MONO_TYPE_CLASS:
213 case MONO_TYPE_OBJECT:
214 case MONO_TYPE_STRING:
215 case MONO_TYPE_SZARRAY:
216 case MONO_TYPE_ARRAY:
217 add_general (&gr, &stack_size, ainfo);
219 case MONO_TYPE_VALUETYPE:
220 if (sig->params [i]->data.klass->enumtype) {
221 simpletype = sig->params [i]->data.klass->enum_basetype->type;
227 add_general (&gr, &stack_size, ainfo);
229 case MONO_TYPE_TYPEDBYREF:
230 add_general (&gr, &stack_size, ainfo);
234 add_general (&gr, &stack_size, ainfo);
237 add_float (&fr, &stack_size, ainfo);
240 add_float (&fr, &stack_size, ainfo);
243 g_assert_not_reached ();
249 simpletype = sig->ret->type;
251 switch (simpletype) {
252 case MONO_TYPE_BOOLEAN:
263 case MONO_TYPE_CLASS:
264 case MONO_TYPE_OBJECT:
265 case MONO_TYPE_SZARRAY:
266 case MONO_TYPE_ARRAY:
267 case MONO_TYPE_STRING:
268 cinfo->ret.storage = ArgInIReg;
269 cinfo->ret.reg = AMD64_RAX;
273 cinfo->ret.storage = ArgInIReg;
274 cinfo->ret.reg = AMD64_RAX;
278 cinfo->ret.storage = ArgInSSEReg;
279 cinfo->ret.reg = AMD64_XMM0;
281 case MONO_TYPE_VALUETYPE:
282 if (sig->ret->data.klass->enumtype) {
283 simpletype = sig->ret->data.klass->enum_basetype->type;
292 case MONO_TYPE_TYPEDBYREF:
294 /* Same as a valuetype with size 24 */
303 g_error ("Can't handle as return value 0x%x", sig->ret->type);
307 cinfo->stack_usage = stack_size;
308 cinfo->reg_usage = gr;
313 * mono_arch_get_argument_info:
314 * @csig: a method signature
315 * @param_count: the number of parameters to consider
316 * @arg_info: an array to store the result infos
318 * Gathers information on parameters such as size, alignment and
319 * padding. arg_info should be large enought to hold param_count + 1 entries.
321 * Returns the size of the activation frame.
324 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
326 int k, frame_size = 0;
327 int size, align, pad;
332 if (MONO_TYPE_ISSTRUCT (csig->ret)) {
333 frame_size += sizeof (gpointer);
337 arg_info [0].offset = offset;
340 frame_size += sizeof (gpointer);
344 arg_info [0].size = frame_size;
346 for (k = 0; k < param_count; k++) {
349 size = mono_type_native_stack_size (csig->params [k], &align);
351 size = mono_type_stack_size (csig->params [k], &align);
353 /* ignore alignment for now */
356 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
357 arg_info [k].pad = pad;
359 arg_info [k + 1].pad = 0;
360 arg_info [k + 1].size = size;
362 arg_info [k + 1].offset = offset;
366 align = MONO_ARCH_FRAME_ALIGNMENT;
367 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
368 arg_info [k].pad = pad;
374 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
380 * Initialize the cpu to execute managed code.
383 mono_arch_cpu_init (void)
387 /* spec compliance requires running with double precision */
388 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
389 fpcw &= ~X86_FPCW_PRECC_MASK;
390 fpcw |= X86_FPCW_PREC_DOUBLE;
391 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
392 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
397 * This function returns the optimizations supported on this cpu.
400 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
402 int eax, ebx, ecx, edx;
408 /* Feature Flags function, flags returned in EDX. */
409 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
410 if (edx & (1 << 15)) {
411 opts |= MONO_OPT_CMOV;
413 opts |= MONO_OPT_FCMOV;
415 *exclude_mask |= MONO_OPT_FCMOV;
417 *exclude_mask |= MONO_OPT_CMOV;
423 is_regsize_var (MonoType *t) {
433 case MONO_TYPE_OBJECT:
434 case MONO_TYPE_STRING:
435 case MONO_TYPE_CLASS:
436 case MONO_TYPE_SZARRAY:
437 case MONO_TYPE_ARRAY:
439 case MONO_TYPE_VALUETYPE:
440 if (t->data.klass->enumtype)
441 return is_regsize_var (t->data.klass->enum_basetype);
448 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
453 for (i = 0; i < cfg->num_varinfo; i++) {
454 MonoInst *ins = cfg->varinfo [i];
455 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
458 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
461 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
462 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
466 if (ins->opcode == OP_ARG)
469 /* we dont allocate I1 to registers because there is no simply way to sign extend
470 * 8bit quantities in caller saved registers on x86 */
471 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) ||
472 (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
473 (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
474 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
475 g_assert (i == vmv->idx);
476 vars = g_list_prepend (vars, vmv);
480 vars = mono_varlist_sort (cfg, vars, 0);
486 mono_arch_get_global_int_regs (MonoCompile *cfg)
490 /* We use the callee saved registers for global allocation */
491 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
492 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
493 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
494 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
495 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
501 * mono_arch_regalloc_cost:
503 * Return the cost, in number of memory references, of the action of
504 * allocating the variable VMV into a register during global register
508 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
510 MonoInst *ins = cfg->varinfo [vmv->idx];
512 if (cfg->method->save_lmf)
513 /* The register is already saved */
514 return (ins->opcode == OP_ARG) ? 1 : 0;
516 /* push+pop+possible load if it is an argument */
517 return (ins->opcode == OP_ARG) ? 3 : 2;
521 mono_arch_allocate_vars (MonoCompile *m)
523 MonoMethodSignature *sig;
524 MonoMethodHeader *header;
526 int i, offset, size, align, curinst;
529 header = ((MonoMethodNormal *)m->method)->header;
531 sig = m->method->signature;
533 cinfo = get_call_info (sig, FALSE);
535 if (sig->ret->type != MONO_TYPE_VOID) {
536 switch (cinfo->ret.storage) {
540 m->ret->opcode = OP_REGVAR;
541 m->ret->inst_c0 = cinfo->ret.reg;
544 g_assert_not_reached ();
546 m->ret->dreg = m->ret->inst_c0;
550 * We use the ABI calling conventions for managed code as well.
551 * FIXME: Exception: valuetypes are never passed or returned in registers.
554 /* Locals are allocated backwards from %fp */
555 m->frame_reg = AMD64_RBP;
559 * Reserve a stack slot for holding information used during exception
562 if (header->num_clauses)
563 offset += sizeof (gpointer) * 2;
565 if (m->method->save_lmf) {
566 offset += sizeof (MonoLMF);
567 m->arch.lmf_offset = offset;
570 curinst = m->locals_start;
571 for (i = curinst; i < m->num_varinfo; ++i) {
572 inst = m->varinfo [i];
574 if (inst->opcode == OP_REGVAR) {
575 //g_print ("allocating local %d to %s\n", i, mono_arch_regname (inst->dreg));
579 /* inst->unused indicates native sized value types, this is used by the
580 * pinvoke wrappers when they call functions returning structure */
581 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
582 size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
584 size = mono_type_stack_size (inst->inst_vtype, &align);
587 * variables are accessed as negative offsets from %fp, so increase
588 * the offset before assigning it to a variable
593 offset &= ~(align - 1);
594 inst->opcode = OP_REGOFFSET;
595 inst->inst_basereg = AMD64_RBP;
596 inst->inst_offset = - offset;
598 //g_print ("allocating local %d to [%s - %d]\n", i, mono_arch_regname (inst->inst_basereg), - inst->inst_offset);
601 if (sig->call_convention == MONO_CALL_VARARG) {
603 m->sig_cookie = cinfo->sig_cookie.offset;
606 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
607 inst = m->varinfo [i];
608 if (inst->opcode != OP_REGVAR) {
609 ArgInfo *ainfo = &cinfo->args [i];
610 gboolean inreg = TRUE;
613 if (sig->hasthis && (i == 0))
614 arg_type = &mono_defaults.object_class->byval_arg;
616 arg_type = sig->params [i - sig->hasthis];
618 /* FIXME: Allocate volatile arguments to registers */
619 if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
623 * Under AMD64, all registers used to pass arguments to functions
624 * are volatile across calls.
625 * FIXME: Optimize this.
627 if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInSSEReg))
630 if (MONO_TYPE_ISSTRUCT (arg_type))
631 /* FIXME: this isn't needed */
634 inst->opcode = OP_REGOFFSET;
636 switch (ainfo->storage) {
639 inst->opcode = OP_REGVAR;
640 inst->dreg = ainfo->reg;
643 inst->opcode = OP_REGOFFSET;
644 inst->inst_basereg = AMD64_RBP;
645 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
652 inst->opcode = OP_REGOFFSET;
653 inst->inst_basereg = AMD64_RBP;
654 /* These arguments are saved to the stack in the prolog */
656 inst->inst_offset = - offset;
659 if (MONO_TYPE_ISSTRUCT (arg_type)) {
660 /* Add a level of indirection */
662 * It would be easier to add OP_LDIND_I here, but ldind_i instructions
663 * are destructively modified in a lot of places in inssel.brg.
670 m->stack_offset = offset;
672 /* Add a properly aligned dword for use by int<->float conversion opcodes */
674 mono_spillvar_offset_float (m, 0);
679 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
680 * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info
684 * take the arguments and generate the arch-specific
685 * instructions to properly call the function in call.
686 * This includes pushing, moving arguments to the right register
688 * Issue: who does the spilling if needed, and when?
691 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
693 MonoMethodSignature *sig;
694 int i, n, stack_size;
699 /* add the vararg cookie before the non-implicit args */
700 if (call->signature->call_convention == MONO_CALL_VARARG) {
703 /* FIXME: Add support for signature tokens to AOT */
704 cfg->disable_aot = TRUE;
705 MONO_INST_NEW (cfg, arg, OP_OUTARG);
706 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
707 sig_arg->inst_p0 = call->signature;
708 arg->inst_left = sig_arg;
709 arg->type = STACK_PTR;
710 /* prepend, so they get reversed */
711 arg->next = call->out_args;
712 call->out_args = arg;
713 stack_size += sizeof (gpointer);
715 sig = call->signature;
716 n = sig->param_count + sig->hasthis;
718 cinfo = get_call_info (sig, sig->pinvoke);
720 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
721 stack_size += sizeof (gpointer);
723 for (i = 0; i < n; ++i) {
724 ainfo = cinfo->args + i;
726 if (is_virtual && i == 0) {
727 /* the argument will be attached to the call instruction */
730 MONO_INST_NEW (cfg, arg, OP_OUTARG);
732 arg->cil_code = in->cil_code;
734 arg->type = in->type;
735 /* prepend, so they get reversed */
736 arg->next = call->out_args;
737 call->out_args = arg;
739 switch (ainfo->storage) {
741 arg->opcode = OP_OUTARG_REG;
742 arg->unused = ainfo->reg;
743 call->used_iregs |= 1 << ainfo->reg;
746 arg->opcode = OP_AMD64_OUTARG_XMMREG;
747 arg->unused = ainfo->reg;
748 /* FIXME: set call->used_... */
751 arg->opcode = OP_OUTARG;
755 g_assert_not_reached ();
760 call->stack_usage = cinfo->stack_usage;
761 cfg->param_area = MAX (cfg->param_area, call->stack_usage);
762 cfg->flags |= MONO_CFG_HAS_CALLS;
770 * Allow tracing to work with this interface (with an optional argument)
774 * This may be needed on some archs or for debugging support.
777 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
779 /* no stack room needed now (may be needed for FASTCALL-trace support) */
781 /* split prolog-epilog requirements? */
782 *code = 50; /* max bytes needed: check this number */
786 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
790 /* if some args are passed in registers, we need to save them here */
791 amd64_push_reg (code, AMD64_RBP);
792 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
793 amd64_push_imm (code, (guint64)(cfg->method));
794 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, (gpointer)func);
795 amd64_call_code (code, 0);
796 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
810 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
813 int arg_size = 0, save_mode = SAVE_NONE;
814 MonoMethod *method = cfg->method;
815 int rtype = method->signature->ret->type;
822 /* special case string .ctor icall */
823 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
824 save_mode = SAVE_EAX;
826 save_mode = SAVE_NONE;
830 save_mode = SAVE_EAX_EDX;
836 case MONO_TYPE_VALUETYPE:
837 if (method->signature->ret->data.klass->enumtype) {
838 rtype = method->signature->ret->data.klass->enum_basetype->type;
841 save_mode = SAVE_STRUCT;
844 save_mode = SAVE_EAX;
850 amd64_push_reg (code, AMD64_RDX);
851 amd64_push_reg (code, AMD64_RAX);
852 if (enable_arguments) {
853 amd64_push_reg (code, AMD64_RDX);
854 amd64_push_reg (code, AMD64_RAX);
859 amd64_push_reg (code, AMD64_RAX);
860 if (enable_arguments) {
861 amd64_push_reg (code, AMD64_RAX);
866 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
867 amd64_fst_membase (code, AMD64_RSP, 0, TRUE, TRUE);
868 if (enable_arguments) {
869 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
870 amd64_fst_membase (code, AMD64_RSP, 0, TRUE, TRUE);
875 if (enable_arguments) {
876 amd64_push_membase (code, AMD64_RBP, 8);
886 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
887 amd64_push_imm (code, (guint64)method);
888 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, (gpointer)func);
889 amd64_call_code (code, 0);
890 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, arg_size + 4);
894 amd64_pop_reg (code, AMD64_RAX);
895 amd64_pop_reg (code, AMD64_RDX);
898 amd64_pop_reg (code, AMD64_RAX);
901 amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
902 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
912 #define EMIT_COND_BRANCH(ins,cond,sign) \
913 if (ins->flags & MONO_INST_BRLABEL) { \
914 if (ins->inst_i0->inst_c0) { \
915 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
917 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
918 if ((cfg->opt & MONO_OPT_BRANCH) && \
919 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
920 x86_branch8 (code, cond, 0, sign); \
922 x86_branch32 (code, cond, 0, sign); \
925 if (ins->inst_true_bb->native_offset) { \
926 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
928 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
929 if ((cfg->opt & MONO_OPT_BRANCH) && \
930 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
931 x86_branch8 (code, cond, 0, sign); \
933 x86_branch32 (code, cond, 0, sign); \
937 /* emit an exception if condition is fail */
938 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
940 mono_add_patch_info (cfg, code - cfg->native_code, \
941 MONO_PATCH_INFO_EXC, exc_name); \
942 x86_branch32 (code, cond, 0, signed); \
945 #define EMIT_FPCOMPARE(code) do { \
946 amd64_fcompp (code); \
947 amd64_fnstsw (code); \
950 #define EMIT_CALL() do { \
951 amd64_set_reg_template (code, GP_SCRATCH_REG); \
952 amd64_call_reg (code, GP_SCRATCH_REG); \
955 /* FIXME: Add more instructions */
956 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM))
959 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
961 MonoInst *ins, *last_ins = NULL;
966 switch (ins->opcode) {
968 /* reg = 0 -> XOR (reg, reg) */
969 /* XOR sets cflags on x86, so we cant do it always */
970 if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
971 ins->opcode = CEE_XOR;
972 ins->sreg1 = ins->dreg;
973 ins->sreg2 = ins->dreg;
977 /* remove unnecessary multiplication with 1 */
978 if (ins->inst_imm == 1) {
979 if (ins->dreg != ins->sreg1) {
980 ins->opcode = OP_MOVE;
982 last_ins->next = ins->next;
989 /* OP_COMPARE_IMM (reg, 0)
991 * OP_AMD64_TEST_NULL (reg)
993 if (ins->inst_imm == 0 && ins->next &&
994 (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
995 ins->next->opcode == OP_CEQ)) {
996 ins->opcode = OP_X86_TEST_NULL;
999 case OP_X86_COMPARE_MEMBASE_IMM:
1001 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1002 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1004 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1005 * OP_COMPARE_IMM reg, imm
1007 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1009 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1010 ins->inst_basereg == last_ins->inst_destbasereg &&
1011 ins->inst_offset == last_ins->inst_offset) {
1012 ins->opcode = OP_COMPARE_IMM;
1013 ins->sreg1 = last_ins->sreg1;
1015 /* check if we can remove cmp reg,0 with test null */
1016 if (ins->inst_imm == 0 && ins->next &&
1017 (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
1018 ins->next->opcode == OP_CEQ)) {
1019 ins->opcode = OP_X86_TEST_NULL;
1024 case OP_LOAD_MEMBASE:
1025 case OP_LOADI4_MEMBASE:
1027 * Note: if reg1 = reg2 the load op is removed
1029 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1030 * OP_LOAD_MEMBASE offset(basereg), reg2
1032 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1033 * OP_MOVE reg1, reg2
1035 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG
1036 || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1037 ins->inst_basereg == last_ins->inst_destbasereg &&
1038 ins->inst_offset == last_ins->inst_offset) {
1039 if (ins->dreg == last_ins->sreg1) {
1040 last_ins->next = ins->next;
1044 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1045 ins->opcode = OP_MOVE;
1046 ins->sreg1 = last_ins->sreg1;
1050 * Note: reg1 must be different from the basereg in the second load
1051 * Note: if reg1 = reg2 is equal then second load is removed
1053 * OP_LOAD_MEMBASE offset(basereg), reg1
1054 * OP_LOAD_MEMBASE offset(basereg), reg2
1056 * OP_LOAD_MEMBASE offset(basereg), reg1
1057 * OP_MOVE reg1, reg2
1059 } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1060 || last_ins->opcode == OP_LOAD_MEMBASE) &&
1061 ins->inst_basereg != last_ins->dreg &&
1062 ins->inst_basereg == last_ins->inst_basereg &&
1063 ins->inst_offset == last_ins->inst_offset) {
1065 if (ins->dreg == last_ins->dreg) {
1066 last_ins->next = ins->next;
1070 ins->opcode = OP_MOVE;
1071 ins->sreg1 = last_ins->dreg;
1074 //g_assert_not_reached ();
1078 * OP_STORE_MEMBASE_IMM imm, offset(basereg)
1079 * OP_LOAD_MEMBASE offset(basereg), reg
1081 * OP_STORE_MEMBASE_IMM imm, offset(basereg)
1082 * OP_ICONST reg, imm
1084 } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1085 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1086 ins->inst_basereg == last_ins->inst_destbasereg &&
1087 ins->inst_offset == last_ins->inst_offset) {
1088 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1089 ins->opcode = OP_ICONST;
1090 ins->inst_c0 = last_ins->inst_imm;
1091 g_assert_not_reached (); // check this rule
1095 case OP_LOADU1_MEMBASE:
1096 case OP_LOADI1_MEMBASE:
1098 * Note: if reg1 = reg2 the load op is removed
1100 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1101 * OP_LOAD_MEMBASE offset(basereg), reg2
1103 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1104 * OP_MOVE reg1, reg2
1106 if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1107 ins->inst_basereg == last_ins->inst_destbasereg &&
1108 ins->inst_offset == last_ins->inst_offset) {
1109 if (ins->dreg == last_ins->sreg1) {
1110 last_ins->next = ins->next;
1114 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1115 ins->opcode = OP_MOVE;
1116 ins->sreg1 = last_ins->sreg1;
1120 case OP_LOADU2_MEMBASE:
1121 case OP_LOADI2_MEMBASE:
1123 * Note: if reg1 = reg2 the load op is removed
1125 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1126 * OP_LOAD_MEMBASE offset(basereg), reg2
1128 * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1129 * OP_MOVE reg1, reg2
1131 if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1132 ins->inst_basereg == last_ins->inst_destbasereg &&
1133 ins->inst_offset == last_ins->inst_offset) {
1134 if (ins->dreg == last_ins->sreg1) {
1135 last_ins->next = ins->next;
1139 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1140 ins->opcode = OP_MOVE;
1141 ins->sreg1 = last_ins->sreg1;
1153 if (ins->dreg == ins->sreg1) {
1155 last_ins->next = ins->next;
1162 * OP_MOVE sreg, dreg
1163 * OP_MOVE dreg, sreg
1165 if (last_ins && last_ins->opcode == OP_MOVE &&
1166 ins->sreg1 == last_ins->dreg &&
1167 ins->dreg == last_ins->sreg1) {
1168 last_ins->next = ins->next;
1177 bb->last_ins = last_ins;
1181 branch_cc_table [] = {
1182 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1183 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1184 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1188 opcode_to_x86_cond (int opcode)
1212 g_assert_not_reached ();
1219 * returns the offset used by spillvar. It allocates a new
1220 * spill variable if necessary.
1223 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1225 MonoSpillInfo **si, *info;
1228 si = &cfg->spill_info;
1230 while (i <= spillvar) {
1233 *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1235 cfg->stack_offset += sizeof (gpointer);
1236 info->offset = - cfg->stack_offset;
1240 return (*si)->offset;
1246 g_assert_not_reached ();
1251 * returns the offset used by spillvar. It allocates a new
1252 * spill float variable if necessary.
1253 * (same as mono_spillvar_offset but for float)
1256 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1258 MonoSpillInfo **si, *info;
1261 si = &cfg->spill_info_float;
1263 while (i <= spillvar) {
1266 *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1268 cfg->stack_offset += sizeof (double);
1269 info->offset = - cfg->stack_offset;
1273 return (*si)->offset;
1279 g_assert_not_reached ();
1284 * Creates a store for spilled floating point items
1287 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1290 MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1292 store->inst_destbasereg = AMD64_RBP;
1293 store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1295 DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)store->inst_offset, reg));
1300 * Creates a load for spilled floating point items
1303 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1306 MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1308 load->inst_basereg = AMD64_RBP;
1309 load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1311 DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)load->inst_offset, reg));
1315 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && AMD64_IS_CALLEE_REG ((r)))
1322 int flags; /* used to track fp spill/load */
1325 static const char*const * ins_spec = amd64_desc;
1328 print_ins (int i, MonoInst *ins)
1330 const char *spec = ins_spec [ins->opcode];
1331 g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1333 g_error ("Unknown opcode: %s\n", mono_inst_name (ins->opcode));
1334 if (spec [MONO_INST_DEST]) {
1335 if (ins->dreg >= MONO_MAX_IREGS)
1336 g_print (" R%d <-", ins->dreg);
1338 g_print (" %s <-", mono_arch_regname (ins->dreg));
1340 if (spec [MONO_INST_SRC1]) {
1341 if (ins->sreg1 >= MONO_MAX_IREGS)
1342 g_print (" R%d", ins->sreg1);
1344 g_print (" %s", mono_arch_regname (ins->sreg1));
1346 if (spec [MONO_INST_SRC2]) {
1347 if (ins->sreg2 >= MONO_MAX_IREGS)
1348 g_print (" R%d", ins->sreg2);
1350 g_print (" %s", mono_arch_regname (ins->sreg2));
1352 if (spec [MONO_INST_CLOB])
1353 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1358 print_regtrack (RegTrack *t, int num)
1364 for (i = 0; i < num; ++i) {
1367 if (i >= MONO_MAX_IREGS) {
1368 g_snprintf (buf, sizeof(buf), "R%d", i);
1371 r = mono_arch_regname (i);
1372 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1376 typedef struct InstList InstList;
1384 static inline InstList*
1385 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1387 InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1397 * Force the spilling of the variable in the symbolic register 'reg'.
1400 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1405 sel = cfg->rs->iassign [reg];
1406 /*i = cfg->rs->isymbolic [sel];
1407 g_assert (i == reg);*/
1409 spill = ++cfg->spill_count;
1410 cfg->rs->iassign [i] = -spill - 1;
1411 mono_regstate_free_int (cfg->rs, sel);
1412 /* we need to create a spill var and insert a load to sel after the current instruction */
1413 MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1415 load->inst_basereg = AMD64_RBP;
1416 load->inst_offset = mono_spillvar_offset (cfg, spill);
1418 while (ins->next != item->prev->data)
1421 load->next = ins->next;
1423 DEBUG (g_print ("SPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1424 i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1425 g_assert (i == sel);
1431 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1436 DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1437 /* exclude the registers in the current instruction */
1438 if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1439 if (ins->sreg1 >= MONO_MAX_IREGS)
1440 regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1442 regmask &= ~ (1 << ins->sreg1);
1443 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1445 if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1446 if (ins->sreg2 >= MONO_MAX_IREGS)
1447 regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1449 regmask &= ~ (1 << ins->sreg2);
1450 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1452 if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1453 regmask &= ~ (1 << ins->dreg);
1454 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1457 DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1458 g_assert (regmask); /* need at least a register we can free */
1460 /* we should track prev_use and spill the register that's farther */
1461 for (i = 0; i < MONO_MAX_IREGS; ++i) {
1462 if (regmask & (1 << i)) {
1464 DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1468 i = cfg->rs->isymbolic [sel];
1469 spill = ++cfg->spill_count;
1470 cfg->rs->iassign [i] = -spill - 1;
1471 mono_regstate_free_int (cfg->rs, sel);
1472 /* we need to create a spill var and insert a load to sel after the current instruction */
1473 MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1475 load->inst_basereg = AMD64_RBP;
1476 load->inst_offset = mono_spillvar_offset (cfg, spill);
1478 while (ins->next != item->prev->data)
1481 load->next = ins->next;
1483 DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1484 i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1485 g_assert (i == sel);
1491 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1494 MONO_INST_NEW (cfg, copy, OP_MOVE);
1498 copy->next = ins->next;
1501 DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1506 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1509 MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1511 store->inst_destbasereg = AMD64_RBP;
1512 store->inst_offset = mono_spillvar_offset (cfg, spill);
1514 store->next = ins->next;
1517 DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", spill, (long)store->inst_offset, prev_reg, mono_arch_regname (reg)));
1522 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1526 prev = item->next->data;
1528 while (prev->next != ins)
1530 to_insert->next = ins;
1531 prev->next = to_insert;
1533 to_insert->next = ins;
1536 * needed otherwise in the next instruction we can add an ins to the
1537 * end and that would get past this instruction.
1539 item->data = to_insert;
1545 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1547 int val = cfg->rs->iassign [sym_reg];
1551 /* the register gets spilled after this inst */
1554 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1556 val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1557 cfg->rs->iassign [sym_reg] = val;
1558 /* add option to store before the instruction for src registers */
1560 create_spilled_store (cfg, spill, val, sym_reg, ins);
1562 cfg->rs->isymbolic [val] = sym_reg;
1567 /* flags used in reginfo->flags */
1569 MONO_X86_FP_NEEDS_LOAD_SPILL = 1 << 0,
1570 MONO_X86_FP_NEEDS_SPILL = 1 << 1,
1571 MONO_X86_FP_NEEDS_LOAD = 1 << 2,
1572 MONO_X86_REG_NOT_ECX = 1 << 3,
1573 MONO_X86_REG_EAX = 1 << 4,
1574 MONO_X86_REG_EDX = 1 << 5,
1575 MONO_X86_REG_ECX = 1 << 6
1579 mono_amd64_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1582 int test_mask = dest_mask;
1584 if (flags & MONO_X86_REG_EAX)
1585 test_mask &= (1 << AMD64_RAX);
1586 else if (flags & MONO_X86_REG_EDX)
1587 test_mask &= (1 << AMD64_RDX);
1588 else if (flags & MONO_X86_REG_ECX)
1589 test_mask &= (1 << AMD64_RCX);
1590 else if (flags & MONO_X86_REG_NOT_ECX)
1591 test_mask &= ~ (1 << AMD64_RCX);
1593 val = mono_regstate_alloc_int (cfg->rs, test_mask);
1594 if (val >= 0 && test_mask != dest_mask)
1595 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1597 if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1598 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1599 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << AMD64_RCX)));
1603 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1605 val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1612 /*#include "cprop.c"*/
1615 * Local register allocation.
1616 * We first scan the list of instructions and we save the liveness info of
1617 * each register (when the register is first used, when it's value is set etc.).
1618 * We also reverse the list of instructions (in the InstList list) because assigning
1619 * registers backwards allows for more tricks to be used.
1622 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1625 MonoRegState *rs = cfg->rs;
1626 int i, val, fpcount;
1627 RegTrack *reginfo, *reginfof;
1628 RegTrack *reginfo1, *reginfo2, *reginfod;
1629 InstList *tmp, *reversed = NULL;
1631 guint32 src1_mask, src2_mask, dest_mask;
1632 GList *fspill_list = NULL;
1637 rs->next_vireg = bb->max_ireg;
1638 rs->next_vfreg = bb->max_freg;
1639 mono_regstate_assign (rs);
1640 reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1641 reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1642 rs->ifree_mask = AMD64_CALLEE_REGS;
1646 /*if (cfg->opt & MONO_OPT_COPYPROP)
1647 local_copy_prop (cfg, ins);*/
1651 DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1652 /* forward pass on the instructions to collect register liveness info */
1654 spec = ins_spec [ins->opcode];
1656 DEBUG (print_ins (i, ins));
1658 if (spec [MONO_INST_SRC1]) {
1659 if (spec [MONO_INST_SRC1] == 'f') {
1661 reginfo1 = reginfof;
1663 spill = g_list_first (fspill_list);
1664 if (spill && fpcount < MONO_MAX_FREGS) {
1665 reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1666 fspill_list = g_list_remove (fspill_list, spill->data);
1672 reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1673 reginfo1 [ins->sreg1].last_use = i;
1674 if (spec [MONO_INST_SRC1] == 'L') {
1675 /* The virtual register is allocated sequentially */
1676 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1677 reginfo1 [ins->sreg1 + 1].last_use = i;
1678 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1679 reginfo1 [ins->sreg1 + 1].born_in = i;
1681 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1682 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1687 if (spec [MONO_INST_SRC2]) {
1688 if (spec [MONO_INST_SRC2] == 'f') {
1690 reginfo2 = reginfof;
1691 spill = g_list_first (fspill_list);
1693 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1694 fspill_list = g_list_remove (fspill_list, spill->data);
1695 if (fpcount >= MONO_MAX_FREGS) {
1697 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1698 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1705 reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1706 reginfo2 [ins->sreg2].last_use = i;
1707 if (spec [MONO_INST_SRC2] == 'L') {
1708 /* The virtual register is allocated sequentially */
1709 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1710 reginfo2 [ins->sreg2 + 1].last_use = i;
1711 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1712 reginfo2 [ins->sreg2 + 1].born_in = i;
1714 if (spec [MONO_INST_CLOB] == 's') {
1715 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1716 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1721 if (spec [MONO_INST_DEST]) {
1722 if (spec [MONO_INST_DEST] == 'f') {
1723 reginfod = reginfof;
1724 if (fpcount >= MONO_MAX_FREGS) {
1725 reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1727 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1734 if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1735 reginfod [ins->dreg].killed_in = i;
1736 reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1737 reginfod [ins->dreg].last_use = i;
1738 if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1739 reginfod [ins->dreg].born_in = i;
1740 if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1741 /* The virtual register is allocated sequentially */
1742 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1743 reginfod [ins->dreg + 1].last_use = i;
1744 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1745 reginfod [ins->dreg + 1].born_in = i;
1747 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1748 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1754 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1759 // todo: check if we have anything left on fp stack, in verify mode?
1762 DEBUG (print_regtrack (reginfo, rs->next_vireg));
1763 DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1766 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1767 dest_mask = src1_mask = src2_mask = AMD64_CALLEE_REGS;
1770 spec = ins_spec [ins->opcode];
1773 DEBUG (g_print ("processing:"));
1774 DEBUG (print_ins (i, ins));
1775 if (spec [MONO_INST_CLOB] == 's') {
1776 if (rs->ifree_mask & (1 << AMD64_RCX)) {
1777 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1778 rs->iassign [ins->sreg2] = AMD64_RCX;
1779 rs->isymbolic [AMD64_RCX] = ins->sreg2;
1780 ins->sreg2 = AMD64_RCX;
1781 rs->ifree_mask &= ~ (1 << AMD64_RCX);
1783 int need_ecx_spill = TRUE;
1785 * we first check if src1/dreg is already assigned a register
1786 * and then we force a spill of the var assigned to ECX.
1788 /* the destination register can't be ECX */
1789 dest_mask &= ~ (1 << AMD64_RCX);
1790 src1_mask &= ~ (1 << AMD64_RCX);
1791 val = rs->iassign [ins->dreg];
1793 * the destination register is already assigned to ECX:
1794 * we need to allocate another register for it and then
1795 * copy from this to ECX.
1797 if (val == AMD64_RCX && ins->dreg != ins->sreg2) {
1799 new_dest = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1800 g_assert (new_dest >= 0);
1801 DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
1803 rs->isymbolic [new_dest] = ins->dreg;
1804 rs->iassign [ins->dreg] = new_dest;
1805 clob_dreg = ins->dreg;
1806 ins->dreg = new_dest;
1807 create_copy_ins (cfg, AMD64_RCX, new_dest, ins);
1808 need_ecx_spill = FALSE;
1809 /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1810 val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1811 rs->iassign [ins->dreg] = val;
1812 rs->isymbolic [val] = prev_dreg;
1815 val = rs->iassign [ins->sreg1];
1816 if (val == AMD64_RCX) {
1817 g_assert_not_reached ();
1818 } else if (val >= 0) {
1820 * the first src reg was already assigned to a register,
1821 * we need to copy it to the dest register because the
1822 * shift instruction clobbers the first operand.
1824 MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
1825 DEBUG (g_print ("\tclob:s moved sreg1 from R%d to R%d\n", val, ins->dreg));
1826 insert_before_ins (ins, tmp, copy);
1828 val = rs->iassign [ins->sreg2];
1829 if (val >= 0 && val != AMD64_RCX) {
1830 MonoInst *move = create_copy_ins (cfg, AMD64_RCX, val, NULL);
1831 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1833 g_assert_not_reached ();
1834 /* FIXME: where is move connected to the instruction list? */
1835 //tmp->prev->data->next = move;
1837 if (need_ecx_spill && !(rs->ifree_mask & (1 << AMD64_RCX))) {
1838 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RCX]));
1839 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RCX]);
1840 mono_regstate_free_int (rs, AMD64_RCX);
1842 /* force-set sreg2 */
1843 rs->iassign [ins->sreg2] = AMD64_RCX;
1844 rs->isymbolic [AMD64_RCX] = ins->sreg2;
1845 ins->sreg2 = AMD64_RCX;
1846 rs->ifree_mask &= ~ (1 << AMD64_RCX);
1848 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
1849 int dest_reg = AMD64_RAX;
1850 int clob_reg = AMD64_RDX;
1851 if (spec [MONO_INST_DEST] == 'd') {
1852 dest_reg = AMD64_RDX; /* reminder */
1853 clob_reg = AMD64_RAX;
1855 val = rs->iassign [ins->dreg];
1856 if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1857 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1858 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1859 mono_regstate_free_int (rs, dest_reg);
1863 /* the register gets spilled after this inst */
1864 int spill = -val -1;
1865 dest_mask = 1 << clob_reg;
1866 prev_dreg = ins->dreg;
1867 val = mono_regstate_alloc_int (rs, dest_mask);
1869 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1870 rs->iassign [ins->dreg] = val;
1872 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1873 DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1874 rs->isymbolic [val] = prev_dreg;
1876 if (val != dest_reg) { /* force a copy */
1877 create_copy_ins (cfg, val, dest_reg, ins);
1880 DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1881 prev_dreg = ins->dreg;
1882 rs->iassign [ins->dreg] = dest_reg;
1883 rs->isymbolic [dest_reg] = ins->dreg;
1884 ins->dreg = dest_reg;
1885 rs->ifree_mask &= ~ (1 << dest_reg);
1888 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1889 if (val != dest_reg) { /* force a copy */
1890 create_copy_ins (cfg, val, dest_reg, ins);
1891 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
1892 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1893 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1894 mono_regstate_free_int (rs, dest_reg);
1898 if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
1899 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
1900 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
1901 mono_regstate_free_int (rs, clob_reg);
1903 src1_mask = 1 << AMD64_RAX;
1904 src2_mask = 1 << AMD64_RCX;
1906 if (spec [MONO_INST_DEST] == 'l') {
1908 val = rs->iassign [ins->dreg];
1909 /* check special case when dreg have been moved from ecx (clob shift) */
1910 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1911 hreg = clob_dreg + 1;
1913 hreg = ins->dreg + 1;
1915 /* base prev_dreg on fixed hreg, handle clob case */
1918 if (val != rs->isymbolic [AMD64_RAX] && !(rs->ifree_mask & (1 << AMD64_RAX))) {
1919 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
1920 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
1921 mono_regstate_free_int (rs, AMD64_RAX);
1923 if (hreg != rs->isymbolic [AMD64_RDX] && !(rs->ifree_mask & (1 << AMD64_RDX))) {
1924 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [AMD64_RDX]));
1925 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RDX]);
1926 mono_regstate_free_int (rs, AMD64_RDX);
1931 if (spec [MONO_INST_DEST] == 'f') {
1932 if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
1935 spill_node = g_list_first (fspill_list);
1936 g_assert (spill_node);
1938 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
1939 insert_before_ins (ins, tmp, store);
1940 fspill_list = g_list_remove (fspill_list, spill_node->data);
1943 } else if (spec [MONO_INST_DEST] == 'L') {
1945 val = rs->iassign [ins->dreg];
1946 /* check special case when dreg have been moved from ecx (clob shift) */
1947 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1948 hreg = clob_dreg + 1;
1950 hreg = ins->dreg + 1;
1952 /* base prev_dreg on fixed hreg, handle clob case */
1953 prev_dreg = hreg - 1;
1958 /* the register gets spilled after this inst */
1961 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1962 rs->iassign [ins->dreg] = val;
1964 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1967 DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
1969 rs->isymbolic [val] = hreg - 1;
1972 val = rs->iassign [hreg];
1976 /* the register gets spilled after this inst */
1979 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
1980 rs->iassign [hreg] = val;
1982 create_spilled_store (cfg, spill, val, hreg, ins);
1985 DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
1986 rs->isymbolic [val] = hreg;
1987 /* save reg allocating into unused */
1990 /* check if we can free our long reg */
1991 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1992 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
1993 mono_regstate_free_int (rs, val);
1996 else if (ins->dreg >= MONO_MAX_IREGS) {
1998 val = rs->iassign [ins->dreg];
1999 if (spec [MONO_INST_DEST] == 'l') {
2000 /* check special case when dreg have been moved from ecx (clob shift) */
2001 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2002 hreg = clob_dreg + 1;
2004 hreg = ins->dreg + 1;
2006 /* base prev_dreg on fixed hreg, handle clob case */
2007 prev_dreg = hreg - 1;
2009 prev_dreg = ins->dreg;
2014 /* the register gets spilled after this inst */
2017 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2018 rs->iassign [ins->dreg] = val;
2020 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2022 DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2023 rs->isymbolic [val] = prev_dreg;
2025 /* handle cases where lreg needs to be eax:edx */
2026 if (spec [MONO_INST_DEST] == 'l') {
2027 /* check special case when dreg have been moved from ecx (clob shift) */
2028 int hreg = prev_dreg + 1;
2029 val = rs->iassign [hreg];
2033 /* the register gets spilled after this inst */
2036 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2037 rs->iassign [hreg] = val;
2039 create_spilled_store (cfg, spill, val, hreg, ins);
2041 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2042 rs->isymbolic [val] = hreg;
2043 if (ins->dreg == AMD64_RAX) {
2044 if (val != AMD64_RDX)
2045 create_copy_ins (cfg, val, AMD64_RDX, ins);
2046 } else if (ins->dreg == AMD64_RDX) {
2047 if (val == AMD64_RAX) {
2049 g_assert_not_reached ();
2051 /* two forced copies */
2052 create_copy_ins (cfg, val, AMD64_RDX, ins);
2053 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2056 if (val == AMD64_RDX) {
2057 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2059 /* two forced copies */
2060 create_copy_ins (cfg, val, AMD64_RDX, ins);
2061 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2064 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2065 DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2066 mono_regstate_free_int (rs, val);
2068 } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != AMD64_RAX && spec [MONO_INST_CLOB] != 'd') {
2069 /* this instruction only outputs to EAX, need to copy */
2070 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2071 } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != AMD64_RDX && spec [MONO_INST_CLOB] != 'd') {
2072 create_copy_ins (cfg, ins->dreg, AMD64_RDX, ins);
2075 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2076 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2077 mono_regstate_free_int (rs, ins->dreg);
2079 /* put src1 in EAX if it needs to be */
2080 if (spec [MONO_INST_SRC1] == 'a') {
2081 if (!(rs->ifree_mask & (1 << AMD64_RAX))) {
2082 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2083 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2084 mono_regstate_free_int (rs, AMD64_RAX);
2086 /* force-set sreg1 */
2087 rs->iassign [ins->sreg1] = AMD64_RAX;
2088 rs->isymbolic [AMD64_RAX] = ins->sreg1;
2089 ins->sreg1 = AMD64_RAX;
2090 rs->ifree_mask &= ~ (1 << AMD64_RAX);
2094 if (spec [MONO_INST_SRC1] == 'f') {
2095 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2097 MonoInst *store = NULL;
2099 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2101 spill_node = g_list_first (fspill_list);
2102 g_assert (spill_node);
2104 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);
2105 fspill_list = g_list_remove (fspill_list, spill_node->data);
2109 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2110 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2111 insert_before_ins (ins, tmp, load);
2113 insert_before_ins (load, tmp, store);
2115 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2116 /* force source to be same as dest */
2117 rs->iassign [ins->sreg1] = ins->dreg;
2118 rs->iassign [ins->sreg1 + 1] = ins->unused;
2120 DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2121 DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2123 ins->sreg1 = ins->dreg;
2125 * No need for saving the reg, we know that src1=dest in this cases
2126 * ins->inst_c0 = ins->unused;
2129 /* make sure that we remove them from free mask */
2130 rs->ifree_mask &= ~ (1 << ins->dreg);
2131 rs->ifree_mask &= ~ (1 << ins->unused);
2133 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2134 val = rs->iassign [ins->sreg1];
2135 prev_sreg1 = ins->sreg1;
2139 /* the register gets spilled after this inst */
2142 if (0 && ins->opcode == OP_MOVE) {
2144 * small optimization: the dest register is already allocated
2145 * but the src one is not: we can simply assign the same register
2146 * here and peephole will get rid of the instruction later.
2147 * This optimization may interfere with the clobbering handling:
2148 * it removes a mov operation that will be added again to handle clobbering.
2149 * There are also some other issues that should with make testjit.
2151 mono_regstate_alloc_int (rs, 1 << ins->dreg);
2152 val = rs->iassign [ins->sreg1] = ins->dreg;
2153 //g_assert (val >= 0);
2154 DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2156 //g_assert (val == -1); /* source cannot be spilled */
2157 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2158 rs->iassign [ins->sreg1] = val;
2159 DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2162 MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2163 insert_before_ins (ins, tmp, store);
2166 rs->isymbolic [val] = prev_sreg1;
2171 /* handle clobbering of sreg1 */
2172 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2173 MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2174 DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2175 if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
2176 /* note: the copy is inserted before the current instruction! */
2177 insert_before_ins (ins, tmp, copy);
2178 /* we set sreg1 to dest as well */
2179 prev_sreg1 = ins->sreg1 = ins->dreg;
2181 /* inserted after the operation */
2182 copy->next = ins->next;
2187 if (spec [MONO_INST_SRC2] == 'f') {
2188 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2190 MonoInst *store = NULL;
2192 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2195 spill_node = g_list_first (fspill_list);
2196 g_assert (spill_node);
2197 if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2198 spill_node = g_list_next (spill_node);
2200 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2201 fspill_list = g_list_remove (fspill_list, spill_node->data);
2205 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2206 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2207 insert_before_ins (ins, tmp, load);
2209 insert_before_ins (load, tmp, store);
2212 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2213 val = rs->iassign [ins->sreg2];
2214 prev_sreg2 = ins->sreg2;
2218 /* the register gets spilled after this inst */
2221 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2222 rs->iassign [ins->sreg2] = val;
2223 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2225 create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2227 rs->isymbolic [val] = prev_sreg2;
2229 if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != AMD64_RCX) {
2230 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [AMD64_RCX]));
2236 if (spec [MONO_INST_CLOB] == 'c') {
2238 guint32 clob_mask = AMD64_CALLEE_REGS;
2239 for (j = 0; j < MONO_MAX_IREGS; ++j) {
2241 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2242 //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2246 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2247 DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2248 mono_regstate_free_int (rs, ins->sreg1);
2250 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2251 DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2252 mono_regstate_free_int (rs, ins->sreg2);
2255 //DEBUG (print_ins (i, ins));
2256 /* this may result from a insert_before call */
2258 bb->code = tmp->data;
2264 g_list_free (fspill_list);
2267 static unsigned char*
2268 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2270 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2271 x86_fnstcw_membase(code, AMD64_RSP, 0);
2272 amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
2273 amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2274 amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
2275 amd64_fldcw_membase (code, AMD64_RSP, 2);
2276 amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
2277 amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
2278 amd64_pop_reg (code, dreg);
2279 amd64_fldcw_membase (code, AMD64_RSP, 0);
2280 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2283 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
2285 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
2289 static unsigned char*
2290 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2292 int sreg = tree->sreg1;
2293 #ifdef PLATFORM_WIN32
2298 * If requested stack size is larger than one page,
2299 * perform stack-touch operation
2302 * Generate stack probe code.
2303 * Under Windows, it is necessary to allocate one page at a time,
2304 * "touching" stack after each successful sub-allocation. This is
2305 * because of the way stack growth is implemented - there is a
2306 * guard page before the lowest stack page that is currently commited.
2307 * Stack normally grows sequentially so OS traps access to the
2308 * guard page and commits more pages when needed.
2310 amd64_test_reg_imm (code, sreg, ~0xFFF);
2311 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2313 br[2] = code; /* loop */
2314 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
2315 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
2316 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2317 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2318 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2319 amd64_patch (br[3], br[2]);
2320 amd64_test_reg_reg (code, sreg, sreg);
2321 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2322 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2324 br[1] = code; x86_jump8 (code, 0);
2326 amd64_patch (br[0], code);
2327 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2328 amd64_patch (br[1], code);
2329 amd64_patch (br[4], code);
2330 #else /* PLATFORM_WIN32 */
2331 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
2333 if (tree->flags & MONO_INST_INIT) {
2335 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
2336 amd64_push_reg (code, AMD64_RAX);
2339 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
2340 amd64_push_reg (code, AMD64_RCX);
2343 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
2344 amd64_push_reg (code, AMD64_RDI);
2348 amd64_shift_reg_imm (code, X86_SHR, sreg, 2);
2349 if (sreg != AMD64_RCX)
2350 amd64_mov_reg_reg (code, AMD64_RCX, sreg, 4);
2351 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2353 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
2355 amd64_prefix (code, X86_REP_PREFIX);
2358 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
2359 amd64_pop_reg (code, AMD64_RDI);
2360 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
2361 amd64_pop_reg (code, AMD64_RCX);
2362 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
2363 amd64_pop_reg (code, AMD64_RAX);
2369 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint32 *code)
2373 /* Move return value to the target register */
2374 /* FIXME: do this in the local reg allocator */
2375 switch (ins->opcode) {
2378 case OP_FCALL_MEMBASE:
2379 /* FIXME: optimize this */
2380 offset = mono_spillvar_offset_float (cfg, 0);
2381 amd64_movsd_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2382 amd64_fld_membase (code, AMD64_RBP, offset, TRUE);
2389 #define REAL_PRINT_REG(text,reg) \
2390 mono_assert (reg >= 0); \
2391 amd64_push_reg (code, AMD64_RAX); \
2392 amd64_push_reg (code, AMD64_RDX); \
2393 amd64_push_reg (code, AMD64_RCX); \
2394 amd64_push_reg (code, reg); \
2395 amd64_push_imm (code, reg); \
2396 amd64_push_imm (code, text " %d %p\n"); \
2397 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2398 amd64_call_reg (code, AMD64_RAX); \
2399 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2400 amd64_pop_reg (code, AMD64_RCX); \
2401 amd64_pop_reg (code, AMD64_RDX); \
2402 amd64_pop_reg (code, AMD64_RAX);
2404 /* benchmark and set based on cpu */
2405 #define LOOP_ALIGNMENT 8
2406 #define bb_is_loop_start(bb) ((bb)->nesting && ((bb)->in_count == 1))
2409 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2414 guint8 *code = cfg->native_code + cfg->code_len;
2415 MonoInst *last_ins = NULL;
2416 guint last_offset = 0;
2419 if (cfg->opt & MONO_OPT_PEEPHOLE)
2420 peephole_pass (cfg, bb);
2422 if (cfg->opt & MONO_OPT_LOOP) {
2423 int pad, align = LOOP_ALIGNMENT;
2424 /* set alignment depending on cpu */
2425 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2427 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2428 amd64_padding (code, pad);
2429 cfg->code_len += pad;
2430 bb->native_offset = cfg->code_len;
2434 if (cfg->verbose_level > 2)
2435 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2437 cpos = bb->max_offset;
2439 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2440 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2441 g_assert (!mono_compile_aot);
2444 cov->data [bb->dfn].cil_code = bb->cil_code;
2445 /* this is not thread save, but good enough */
2446 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count);
2449 offset = code - cfg->native_code;
2453 offset = code - cfg->native_code;
2455 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2457 if (offset > (cfg->code_size - max_len - 16)) {
2458 cfg->code_size *= 2;
2459 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2460 code = cfg->native_code + offset;
2461 mono_jit_stats.code_reallocs++;
2464 mono_debug_record_line_number (cfg, ins, offset);
2466 switch (ins->opcode) {
2468 amd64_mul_reg (code, ins->sreg2, TRUE);
2471 amd64_mul_reg (code, ins->sreg2, FALSE);
2473 case OP_X86_SETEQ_MEMBASE:
2474 amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2476 case OP_STOREI1_MEMBASE_IMM:
2477 g_assert (amd64_is_imm32 (ins->inst_imm));
2478 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2480 case OP_STOREI2_MEMBASE_IMM:
2481 g_assert (amd64_is_imm32 (ins->inst_imm));
2482 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2484 case OP_STOREI4_MEMBASE_IMM:
2485 g_assert (amd64_is_imm32 (ins->inst_imm));
2486 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2488 case OP_STOREI1_MEMBASE_REG:
2489 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2491 case OP_STOREI2_MEMBASE_REG:
2492 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2494 case OP_STORE_MEMBASE_REG:
2495 case OP_STOREI8_MEMBASE_REG:
2496 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2498 case OP_STOREI4_MEMBASE_REG:
2499 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2501 case OP_STORE_MEMBASE_IMM:
2502 case OP_STOREI8_MEMBASE_IMM:
2503 if (amd64_is_imm32 (ins->inst_imm))
2504 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2506 amd64_mov_reg_imm (code, GP_SCRATCH_REG, ins->inst_imm);
2507 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, GP_SCRATCH_REG, 8);
2511 amd64_mov_reg_mem (code, ins->dreg, ins->inst_p0, sizeof (gpointer));
2514 amd64_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2517 amd64_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2520 amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2521 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2523 case OP_LOAD_MEMBASE:
2524 case OP_LOADI8_MEMBASE:
2525 if (amd64_is_imm32 (ins->inst_offset)) {
2526 amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2529 amd64_mov_reg_imm_size (code, GP_SCRATCH_REG, ins->inst_offset, 8);
2530 amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, GP_SCRATCH_REG, 0, 8);
2533 case OP_LOADI4_MEMBASE:
2534 amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2536 case OP_LOADU4_MEMBASE:
2537 amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2539 case OP_LOADU1_MEMBASE:
2540 amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2542 case OP_LOADI1_MEMBASE:
2543 amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2545 case OP_LOADU2_MEMBASE:
2546 amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2548 case OP_LOADI2_MEMBASE:
2549 amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2552 amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2555 amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2558 amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2561 amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2564 /* Clean out the upper word */
2565 amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2568 amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2572 amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2574 case OP_COMPARE_IMM:
2575 g_assert (amd64_is_imm32 (ins->inst_imm));
2576 amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2578 case OP_X86_COMPARE_MEMBASE_REG:
2579 amd64_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2581 case OP_X86_COMPARE_MEMBASE_IMM:
2582 g_assert (amd64_is_imm32 (ins->inst_imm));
2583 amd64_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2585 case OP_X86_COMPARE_REG_MEMBASE:
2586 amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2588 case OP_X86_TEST_NULL:
2589 amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
2591 case OP_X86_ADD_MEMBASE_IMM:
2592 amd64_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2594 case OP_X86_ADD_MEMBASE:
2595 amd64_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2597 case OP_X86_SUB_MEMBASE_IMM:
2598 g_assert (amd64_is_imm32 (ins->inst_imm));
2599 amd64_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2601 case OP_X86_SUB_MEMBASE:
2602 amd64_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2604 case OP_X86_INC_MEMBASE:
2605 amd64_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2607 case OP_X86_INC_REG:
2608 amd64_inc_reg (code, ins->dreg);
2610 case OP_X86_DEC_MEMBASE:
2611 amd64_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2613 case OP_X86_DEC_REG:
2614 amd64_dec_reg (code, ins->dreg);
2616 case OP_X86_MUL_MEMBASE:
2617 amd64_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2619 case OP_AMD64_ICOMPARE_MEMBASE_REG:
2620 amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2622 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2623 amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2625 case OP_AMD64_ICOMPARE_REG_MEMBASE:
2626 amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2629 amd64_breakpoint (code);
2634 amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2637 amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2640 g_assert (amd64_is_imm32 (ins->inst_imm));
2641 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2644 g_assert (amd64_is_imm32 (ins->inst_imm));
2645 amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2649 amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2652 amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2655 g_assert (amd64_is_imm32 (ins->inst_imm));
2656 amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2659 g_assert (amd64_is_imm32 (ins->inst_imm));
2660 amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2663 amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2666 g_assert (amd64_is_imm32 (ins->inst_imm));
2667 amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2671 amd64_div_reg (code, ins->sreg2, TRUE);
2674 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2675 amd64_div_reg (code, ins->sreg2, FALSE);
2678 g_assert (amd64_is_imm32 (ins->inst_imm));
2679 amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2681 amd64_div_reg (code, ins->sreg2, TRUE);
2685 amd64_div_reg (code, ins->sreg2, TRUE);
2688 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2689 amd64_div_reg (code, ins->sreg2, FALSE);
2692 g_assert (amd64_is_imm32 (ins->inst_imm));
2693 amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2695 amd64_div_reg (code, ins->sreg2, TRUE);
2698 amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2701 : g_assert (amd64_is_imm32 (ins->inst_imm));
2702 amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2705 amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2708 g_assert (amd64_is_imm32 (ins->inst_imm));
2709 amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2713 g_assert (ins->sreg2 == AMD64_RCX);
2714 amd64_shift_reg (code, X86_SHL, ins->dreg);
2718 g_assert (ins->sreg2 == AMD64_RCX);
2719 amd64_shift_reg (code, X86_SAR, ins->dreg);
2722 g_assert (amd64_is_imm32 (ins->inst_imm));
2723 amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2726 g_assert (amd64_is_imm32 (ins->inst_imm));
2727 amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2730 g_assert (amd64_is_imm32 (ins->inst_imm));
2731 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2733 case OP_LSHR_UN_IMM:
2734 g_assert (amd64_is_imm32 (ins->inst_imm));
2735 amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2738 g_assert (ins->sreg2 == AMD64_RCX);
2739 amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2742 g_assert (ins->sreg2 == AMD64_RCX);
2743 amd64_shift_reg (code, X86_SHR, ins->dreg);
2746 g_assert (amd64_is_imm32 (ins->inst_imm));
2747 amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2750 g_assert (amd64_is_imm32 (ins->inst_imm));
2751 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2756 amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
2759 amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
2762 amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
2765 amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
2769 amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
2772 amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
2775 amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
2778 amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
2781 amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
2784 amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
2787 amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
2790 amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
2793 amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
2796 amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
2799 amd64_neg_reg_size (code, ins->sreg1, 4);
2802 amd64_not_reg_size (code, ins->sreg1, 4);
2805 g_assert (ins->sreg2 == AMD64_RCX);
2806 amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
2809 g_assert (ins->sreg2 == AMD64_RCX);
2810 amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
2813 amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2815 case OP_ISHR_UN_IMM:
2816 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2819 g_assert (ins->sreg2 == AMD64_RCX);
2820 amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2823 amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2826 amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2829 amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
2832 amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2833 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2835 case OP_IMUL_OVF_UN: {
2836 /* the mul operation and the exception check should most likely be split */
2837 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2838 /*g_assert (ins->sreg2 == X86_EAX);
2839 g_assert (ins->dreg == X86_EAX);*/
2840 if (ins->sreg2 == X86_EAX) {
2841 non_eax_reg = ins->sreg1;
2842 } else if (ins->sreg1 == X86_EAX) {
2843 non_eax_reg = ins->sreg2;
2845 /* no need to save since we're going to store to it anyway */
2846 if (ins->dreg != X86_EAX) {
2848 amd64_push_reg (code, X86_EAX);
2850 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2851 non_eax_reg = ins->sreg2;
2853 if (ins->dreg == X86_EDX) {
2856 amd64_push_reg (code, X86_EAX);
2858 } else if (ins->dreg != X86_EAX) {
2860 amd64_push_reg (code, X86_EDX);
2862 amd64_mul_reg_size (code, non_eax_reg, FALSE, 4);
2863 /* save before the check since pop and mov don't change the flags */
2864 if (ins->dreg != X86_EAX)
2865 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2867 amd64_pop_reg (code, X86_EDX);
2869 amd64_pop_reg (code, X86_EAX);
2870 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2874 amd64_cdq_size (code, 4);
2875 amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
2878 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2879 amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
2882 amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2883 amd64_cdq_size (code, 4);
2884 amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
2887 amd64_cdq_size (code, 4);
2888 amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
2891 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2892 amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
2895 amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2896 amd64_cdq_size (code, 4);
2897 amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
2901 amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
2903 case OP_ICOMPARE_IMM:
2904 amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
2912 EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
2919 EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
2922 amd64_not_reg (code, ins->sreg1);
2925 amd64_neg_reg (code, ins->sreg1);
2928 amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2931 amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2935 /* FIXME: optimize this */
2936 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
2939 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2940 amd64_set_reg_template (code, ins->dreg);
2945 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
2947 case OP_AMD64_SET_XMMREG: {
2948 /* FIXME: optimize this */
2949 guint32 offset = mono_spillvar_offset_float (cfg, 0);
2950 amd64_fst_membase (code, AMD64_RBP, offset, TRUE, TRUE);
2951 /* ins->dreg is set to -1 by the reg allocator */
2952 amd64_movsd_reg_membase (code, ins->unused, AMD64_RBP, offset);
2956 g_assert_not_reached ();
2959 * Note: this 'frame destruction' logic is useful for tail calls, too.
2960 * Keep in sync with the code in emit_epilog.
2964 /* FIXME: no tracing support... */
2965 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2966 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2967 /* reset offset to make max_len work */
2968 offset = code - cfg->native_code;
2970 g_assert (!cfg->method->save_lmf);
2972 if (cfg->used_int_regs & (1 << AMD64_RBX))
2974 if (cfg->used_int_regs & (1 << AMD64_RDI))
2976 if (cfg->used_int_regs & (1 << AMD64_RSI))
2979 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
2981 if (cfg->used_int_regs & (1 << AMD64_RSI))
2982 amd64_pop_reg (code, AMD64_RSI);
2983 if (cfg->used_int_regs & (1 << AMD64_RDI))
2984 amd64_pop_reg (code, AMD64_RDI);
2985 if (cfg->used_int_regs & (1 << AMD64_RBX))
2986 amd64_pop_reg (code, AMD64_RBX);
2988 /* restore ESP/EBP */
2990 offset = code - cfg->native_code;
2991 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2992 x86_jump32 (code, 0);
2996 /* ensure ins->sreg1 is not NULL */
2997 amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
3000 int hreg = ins->sreg1 == AMD64_RAX? AMD64_RCX: AMD64_RAX;
3001 amd64_push_reg (code, hreg);
3002 amd64_lea_membase (code, hreg, AMD64_RBP, cfg->sig_cookie);
3003 amd64_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3004 amd64_pop_reg (code, hreg);
3012 call = (MonoCallInst*)ins;
3013 if (ins->flags & MONO_INST_HAS_METHOD)
3014 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
3016 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
3019 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3020 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3021 code = emit_move_return_value (cfg, ins, code);
3026 case OP_VOIDCALL_REG:
3028 call = (MonoCallInst*)ins;
3029 amd64_call_reg (code, ins->sreg1);
3030 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3031 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3032 code = emit_move_return_value (cfg, ins, code);
3034 case OP_FCALL_MEMBASE:
3035 case OP_LCALL_MEMBASE:
3036 case OP_VCALL_MEMBASE:
3037 case OP_VOIDCALL_MEMBASE:
3038 case OP_CALL_MEMBASE:
3039 call = (MonoCallInst*)ins;
3040 amd64_call_membase (code, ins->sreg1, ins->inst_offset);
3041 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3042 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3043 code = emit_move_return_value (cfg, ins, code);
3047 amd64_push_reg (code, ins->sreg1);
3049 case OP_X86_PUSH_IMM:
3050 g_assert (amd64_is_imm32 (ins->inst_imm));
3051 amd64_push_imm (code, ins->inst_imm);
3053 case OP_X86_PUSH_MEMBASE:
3054 amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
3056 case OP_X86_PUSH_OBJ:
3057 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
3058 amd64_push_reg (code, AMD64_RDI);
3059 amd64_push_reg (code, AMD64_RSI);
3060 amd64_push_reg (code, AMD64_RCX);
3061 if (ins->inst_offset)
3062 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
3064 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 4);
3065 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 12);
3066 amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 2));
3068 amd64_prefix (code, X86_REP_PREFIX);
3070 amd64_pop_reg (code, AMD64_RCX);
3071 amd64_pop_reg (code, AMD64_RSI);
3072 amd64_pop_reg (code, AMD64_RDI);
3075 amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3077 case OP_X86_LEA_MEMBASE:
3078 amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3081 amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3084 /* keep alignment */
3085 amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3086 amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3087 code = mono_emit_stack_alloc (code, ins);
3088 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 4);
3094 amd64_push_reg (code, ins->sreg1);
3095 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
3096 (gpointer)"mono_arch_throw_exception");
3100 case OP_CALL_HANDLER:
3101 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3102 amd64_call_imm (code, 0);
3105 ins->inst_c0 = code - cfg->native_code;
3108 //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3109 //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3111 if (ins->flags & MONO_INST_BRLABEL) {
3112 if (ins->inst_i0->inst_c0) {
3113 amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3115 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3116 if ((cfg->opt & MONO_OPT_BRANCH) &&
3117 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3118 x86_jump8 (code, 0);
3120 x86_jump32 (code, 0);
3123 if (ins->inst_target_bb->native_offset) {
3124 amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
3126 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3127 if ((cfg->opt & MONO_OPT_BRANCH) &&
3128 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3129 x86_jump8 (code, 0);
3131 x86_jump32 (code, 0);
3136 amd64_jump_reg (code, ins->sreg1);
3140 amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3141 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3145 amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3146 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3150 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3151 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3155 amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3156 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3160 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3161 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3163 case OP_COND_EXC_EQ:
3164 case OP_COND_EXC_NE_UN:
3165 case OP_COND_EXC_LT:
3166 case OP_COND_EXC_LT_UN:
3167 case OP_COND_EXC_GT:
3168 case OP_COND_EXC_GT_UN:
3169 case OP_COND_EXC_GE:
3170 case OP_COND_EXC_GE_UN:
3171 case OP_COND_EXC_LE:
3172 case OP_COND_EXC_LE_UN:
3173 case OP_COND_EXC_OV:
3174 case OP_COND_EXC_NO:
3176 case OP_COND_EXC_NC:
3177 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ],
3178 (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3190 EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3193 /* floating point opcodes */
3195 double d = *(double *)ins->inst_p0;
3197 if ((d == 0.0) && (mono_signbit (d) == 0)) {
3199 } else if (d == 1.0) {
3202 /* FIXME: Use RIP relative addressing */
3203 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
3204 amd64_set_reg_template (code, GP_SCRATCH_REG);
3205 amd64_fld_membase (code, GP_SCRATCH_REG, 0, TRUE);
3210 float f = *(float *)ins->inst_p0;
3212 if ((f == 0.0) && (mono_signbit (f) == 0)) {
3214 } else if (f == 1.0) {
3217 /* FIXME: Use RIP relative addressing */
3218 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
3219 amd64_set_reg_template (code, GP_SCRATCH_REG);
3220 amd64_fld_membase (code, GP_SCRATCH_REG, 0, FALSE);
3224 case OP_STORER8_MEMBASE_REG:
3225 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3227 case OP_LOADR8_SPILL_MEMBASE:
3228 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3229 amd64_fxch (code, 1);
3231 case OP_LOADR8_MEMBASE:
3232 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3234 case OP_STORER4_MEMBASE_REG:
3235 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3237 case OP_LOADR4_MEMBASE:
3238 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3240 case CEE_CONV_R4: /* FIXME: change precision */
3242 amd64_push_reg (code, ins->sreg1);
3243 amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
3244 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 4);
3246 case OP_LCONV_TO_R4: /* FIXME: change precision */
3247 case OP_LCONV_TO_R8:
3248 amd64_push_reg (code, ins->sreg1);
3249 amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3250 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 4);
3252 case OP_X86_FP_LOAD_I8:
3253 amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3255 case OP_X86_FP_LOAD_I4:
3256 amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3258 case OP_FCONV_TO_I1:
3259 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3261 case OP_FCONV_TO_U1:
3262 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3264 case OP_FCONV_TO_I2:
3265 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3267 case OP_FCONV_TO_U2:
3268 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3270 case OP_FCONV_TO_I4:
3272 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3274 case OP_FCONV_TO_I8:
3275 code = emit_float_to_int (cfg, code, ins->dreg, 8, TRUE);
3277 case OP_LCONV_TO_R_UN: {
3278 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3281 /* load 64bit integer to FP stack */
3282 amd64_push_imm (code, 0);
3283 amd64_push_reg (code, ins->sreg2);
3284 amd64_push_reg (code, ins->sreg1);
3285 amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3286 /* store as 80bit FP value */
3287 x86_fst80_membase (code, AMD64_RSP, 0);
3289 /* test if lreg is negative */
3290 amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3291 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3293 /* add correction constant mn */
3294 x86_fld80_mem (code, mn);
3295 x86_fld80_membase (code, AMD64_RSP, 0);
3296 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3297 x86_fst80_membase (code, AMD64_RSP, 0);
3299 amd64_patch (br, code);
3301 x86_fld80_membase (code, AMD64_RSP, 0);
3302 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3306 case OP_LCONV_TO_OVF_I: {
3307 guint8 *br [3], *label [1];
3310 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3312 amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3314 /* If the low word top bit is set, see if we are negative */
3315 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3316 /* We are not negative (no top bit set, check for our top word to be zero */
3317 amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3318 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3321 /* throw exception */
3322 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3323 x86_jump32 (code, 0);
3325 amd64_patch (br [0], code);
3326 /* our top bit is set, check that top word is 0xfffffff */
3327 amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3329 amd64_patch (br [1], code);
3330 /* nope, emit exception */
3331 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3332 amd64_patch (br [2], label [0]);
3334 if (ins->dreg != ins->sreg1)
3335 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3338 case CEE_CONV_OVF_U4:
3340 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3343 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3346 amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3349 amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3352 amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3360 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3365 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3372 * it really doesn't make sense to inline all this code,
3373 * it's here just to show that things may not be as simple
3376 guchar *check_pos, *end_tan, *pop_jump;
3377 amd64_push_reg (code, AMD64_RAX);
3379 amd64_fnstsw (code);
3380 amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3382 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3383 amd64_fstp (code, 0); /* pop the 1.0 */
3385 x86_jump8 (code, 0);
3387 amd64_fp_op (code, X86_FADD, 0);
3388 amd64_fxch (code, 1);
3391 amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3393 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3394 amd64_fstp (code, 1);
3396 amd64_patch (pop_jump, code);
3397 amd64_fstp (code, 0); /* pop the 1.0 */
3398 amd64_patch (check_pos, code);
3399 amd64_patch (end_tan, code);
3401 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3402 amd64_pop_reg (code, AMD64_RAX);
3407 amd64_fpatan (code);
3409 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3415 amd64_fstp (code, 0);
3420 amd64_push_reg (code, AMD64_RAX);
3421 /* we need to exchange ST(0) with ST(1) */
3422 amd64_fxch (code, 1);
3424 /* this requires a loop, because fprem somtimes
3425 * returns a partial remainder */
3427 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3428 /* x86_fprem1 (code); */
3430 amd64_fnstsw (code);
3431 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3433 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3436 amd64_fstp (code, 1);
3438 amd64_pop_reg (code, AMD64_RAX);
3442 if (cfg->opt & MONO_OPT_FCMOV) {
3443 amd64_fcomip (code, 1);
3444 amd64_fstp (code, 0);
3447 /* this overwrites EAX */
3448 EMIT_FPCOMPARE(code);
3449 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3452 if (cfg->opt & MONO_OPT_FCMOV) {
3453 /* zeroing the register at the start results in
3454 * shorter and faster code (we can also remove the widening op)
3456 guchar *unordered_check;
3457 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3458 amd64_fcomip (code, 1);
3459 amd64_fstp (code, 0);
3460 unordered_check = code;
3461 x86_branch8 (code, X86_CC_P, 0, FALSE);
3462 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3463 amd64_patch (unordered_check, code);
3466 if (ins->dreg != AMD64_RAX)
3467 amd64_push_reg (code, AMD64_RAX);
3469 EMIT_FPCOMPARE(code);
3470 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3471 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3472 amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3473 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3475 if (ins->dreg != AMD64_RAX)
3476 amd64_pop_reg (code, AMD64_RAX);
3480 if (cfg->opt & MONO_OPT_FCMOV) {
3481 /* zeroing the register at the start results in
3482 * shorter and faster code (we can also remove the widening op)
3484 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3485 amd64_fcomip (code, 1);
3486 amd64_fstp (code, 0);
3487 if (ins->opcode == OP_FCLT_UN) {
3488 guchar *unordered_check = code;
3489 guchar *jump_to_end;
3490 x86_branch8 (code, X86_CC_P, 0, FALSE);
3491 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3493 x86_jump8 (code, 0);
3494 amd64_patch (unordered_check, code);
3495 amd64_inc_reg (code, ins->dreg);
3496 amd64_patch (jump_to_end, code);
3498 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3502 if (ins->dreg != AMD64_RAX)
3503 amd64_push_reg (code, AMD64_RAX);
3505 EMIT_FPCOMPARE(code);
3506 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3507 if (ins->opcode == OP_FCLT_UN) {
3508 guchar *is_not_zero_check, *end_jump;
3509 is_not_zero_check = code;
3510 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3512 x86_jump8 (code, 0);
3513 amd64_patch (is_not_zero_check, code);
3514 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3516 amd64_patch (end_jump, code);
3518 amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3519 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3521 if (ins->dreg != AMD64_RAX)
3522 amd64_pop_reg (code, AMD64_RAX);
3526 if (cfg->opt & MONO_OPT_FCMOV) {
3527 /* zeroing the register at the start results in
3528 * shorter and faster code (we can also remove the widening op)
3530 guchar *unordered_check;
3531 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3532 amd64_fcomip (code, 1);
3533 amd64_fstp (code, 0);
3534 if (ins->opcode == OP_FCGT) {
3535 unordered_check = code;
3536 x86_branch8 (code, X86_CC_P, 0, FALSE);
3537 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3538 amd64_patch (unordered_check, code);
3540 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3544 if (ins->dreg != AMD64_RAX)
3545 amd64_push_reg (code, AMD64_RAX);
3547 EMIT_FPCOMPARE(code);
3548 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3549 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3550 if (ins->opcode == OP_FCGT_UN) {
3551 guchar *is_not_zero_check, *end_jump;
3552 is_not_zero_check = code;
3553 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3555 x86_jump8 (code, 0);
3556 amd64_patch (is_not_zero_check, code);
3557 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3559 amd64_patch (end_jump, code);
3561 amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3562 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3564 if (ins->dreg != AMD64_RAX)
3565 amd64_pop_reg (code, AMD64_RAX);
3568 if (cfg->opt & MONO_OPT_FCMOV) {
3569 guchar *jump = code;
3570 x86_branch8 (code, X86_CC_P, 0, TRUE);
3571 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3572 amd64_patch (jump, code);
3575 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3576 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3579 /* Branch if C013 != 100 */
3580 if (cfg->opt & MONO_OPT_FCMOV) {
3581 /* branch if !ZF or (PF|CF) */
3582 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3583 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3584 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3587 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3588 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3591 if (cfg->opt & MONO_OPT_FCMOV) {
3592 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3595 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3598 if (cfg->opt & MONO_OPT_FCMOV) {
3599 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3600 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3603 if (ins->opcode == OP_FBLT_UN) {
3604 guchar *is_not_zero_check, *end_jump;
3605 is_not_zero_check = code;
3606 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3608 x86_jump8 (code, 0);
3609 amd64_patch (is_not_zero_check, code);
3610 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3612 amd64_patch (end_jump, code);
3614 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3618 if (cfg->opt & MONO_OPT_FCMOV) {
3619 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3622 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3623 if (ins->opcode == OP_FBGT_UN) {
3624 guchar *is_not_zero_check, *end_jump;
3625 is_not_zero_check = code;
3626 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3628 x86_jump8 (code, 0);
3629 amd64_patch (is_not_zero_check, code);
3630 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3632 amd64_patch (end_jump, code);
3634 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3637 /* Branch if C013 == 100 or 001 */
3638 if (cfg->opt & MONO_OPT_FCMOV) {
3641 /* skip branch if C1=1 */
3643 x86_branch8 (code, X86_CC_P, 0, FALSE);
3644 /* branch if (C0 | C3) = 1 */
3645 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3646 amd64_patch (br1, code);
3649 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3650 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3651 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3652 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3655 /* Branch if C013 == 000 */
3656 if (cfg->opt & MONO_OPT_FCMOV) {
3657 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3660 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3663 /* Branch if C013=000 or 100 */
3664 if (cfg->opt & MONO_OPT_FCMOV) {
3667 /* skip branch if C1=1 */
3669 x86_branch8 (code, X86_CC_P, 0, FALSE);
3670 /* branch if C0=0 */
3671 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3672 amd64_patch (br1, code);
3675 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
3676 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
3677 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3680 /* Branch if C013 != 001 */
3681 if (cfg->opt & MONO_OPT_FCMOV) {
3682 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3683 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3686 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3687 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3689 case CEE_CKFINITE: {
3690 amd64_push_reg (code, AMD64_RAX);
3692 amd64_fnstsw (code);
3693 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
3694 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3695 amd64_pop_reg (code, AMD64_RAX);
3696 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3700 g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3701 g_assert_not_reached ();
3704 if ((code - cfg->native_code - offset) > max_len) {
3705 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
3706 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3707 g_assert_not_reached ();
3713 last_offset = offset;
3718 cfg->code_len = code - cfg->native_code;
3722 mono_arch_register_lowlevel_calls (void)
3727 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3729 MonoJumpInfo *patch_info;
3731 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3732 unsigned char *ip = patch_info->ip.i + code;
3733 const unsigned char *target;
3735 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3737 switch (patch_info->type) {
3738 case MONO_PATCH_INFO_METHOD_REL:
3739 *((gconstpointer *)(ip)) = target;
3741 case MONO_PATCH_INFO_SWITCH: {
3742 *((gconstpointer *)(ip + 2)) = target;
3745 case MONO_PATCH_INFO_IID:
3746 *((guint32 *)(ip + 1)) = (guint32)target;
3748 case MONO_PATCH_INFO_CLASS_INIT: {
3750 /* Might already been changed to a nop */
3751 amd64_call_imm (code, 0);
3754 case MONO_PATCH_INFO_R4:
3755 case MONO_PATCH_INFO_R8:
3756 *((gconstpointer *)(ip + 2)) = target;
3758 case MONO_PATCH_INFO_METHODCONST:
3759 case MONO_PATCH_INFO_CLASS:
3760 case MONO_PATCH_INFO_IMAGE:
3761 case MONO_PATCH_INFO_FIELD:
3762 case MONO_PATCH_INFO_VTABLE:
3763 case MONO_PATCH_INFO_SFLDA:
3764 case MONO_PATCH_INFO_EXC_NAME:
3765 case MONO_PATCH_INFO_LDSTR:
3766 case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
3767 case MONO_PATCH_INFO_LDTOKEN:
3768 case MONO_PATCH_INFO_IP:
3769 *((gconstpointer *)(ip + 2)) = target;
3771 case MONO_PATCH_INFO_METHOD:
3772 case MONO_PATCH_INFO_ABS:
3773 case MONO_PATCH_INFO_INTERNAL_METHOD:
3774 *((gconstpointer *)(ip + 2)) = target;
3779 amd64_patch (ip, (gpointer)target);
3784 mono_arch_max_epilog_size (MonoCompile *cfg)
3786 int exc_count = 0, max_epilog_size = 16;
3787 MonoJumpInfo *patch_info;
3789 if (cfg->method->save_lmf)
3790 max_epilog_size += 128;
3792 if (mono_jit_trace_calls != NULL)
3793 max_epilog_size += 50;
3795 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3796 max_epilog_size += 50;
3798 max_epilog_size += (AMD64_NREG * 2);
3800 /* count the number of exception infos */
3802 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3803 if (patch_info->type == MONO_PATCH_INFO_EXC)
3808 * make sure we have enough space for exceptions
3809 * 16 is the size of two push_imm instructions and a call
3811 max_epilog_size += exc_count*16;
3813 return max_epilog_size;
3817 mono_arch_emit_prolog (MonoCompile *cfg)
3819 MonoMethod *method = cfg->method;
3821 MonoMethodSignature *sig;
3823 int alloc_size, pos, max_offset, i;
3827 cfg->code_size = MAX (((MonoMethodNormal *)method)->header->code_size * 4, 256);
3828 code = cfg->native_code = g_malloc (cfg->code_size);
3830 amd64_push_reg (code, AMD64_RBP);
3831 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
3833 alloc_size = cfg->stack_offset;
3836 if (method->save_lmf) {
3839 pos += sizeof (MonoLMF);
3841 /* save the current IP */
3842 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3843 amd64_set_reg_template (code, GP_SCRATCH_REG);
3844 amd64_push_reg (code, GP_SCRATCH_REG);
3846 /* save all caller saved regs */
3847 amd64_push_reg (code, AMD64_RBP);
3848 amd64_push_reg (code, AMD64_RBX);
3849 amd64_push_reg (code, AMD64_R12);
3850 amd64_push_reg (code, AMD64_R13);
3851 amd64_push_reg (code, AMD64_R14);
3852 amd64_push_reg (code, AMD64_R15);
3854 /* save method info */
3855 amd64_mov_reg_imm (code, GP_SCRATCH_REG, method);
3856 amd64_push_reg (code, GP_SCRATCH_REG);
3858 /* get the address of lmf for the current thread */
3860 * This is performance critical so we try to use some tricks to make
3863 if (lmf_tls_offset != -1) {
3866 /* Load lmf quicky using the GS register */
3867 amd64_prefix (code, X86_GS_PREFIX);
3868 amd64_mov_reg_mem (code, AMD64_RAX, 0, 4);
3869 amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RAX, lmf_tls_offset, 4);
3872 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD,
3873 (gpointer)"mono_get_lmf_addr");
3878 amd64_push_reg (code, AMD64_RAX);
3879 /* push *lfm (previous_lmf) */
3880 amd64_push_membase (code, AMD64_RAX, 0);
3882 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_RSP, sizeof (gpointer));
3886 for (i = 0; i < AMD64_NREG; ++i)
3887 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3888 amd64_push_reg (code, i);
3889 pos += sizeof (gpointer);
3896 /* See mono_emit_stack_alloc */
3897 #ifdef PLATFORM_WIN32
3898 guint32 remaining_size = alloc_size;
3899 while (remaining_size >= 0x1000) {
3900 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3901 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3902 remaining_size -= 0x1000;
3905 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
3907 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
3911 /* compute max_offset in order to use short forward jumps */
3913 if (cfg->opt & MONO_OPT_BRANCH) {
3914 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3915 MonoInst *ins = bb->code;
3916 bb->max_offset = max_offset;
3918 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3920 /* max alignment for loops */
3921 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3922 max_offset += LOOP_ALIGNMENT;
3925 if (ins->opcode == OP_LABEL)
3926 ins->inst_c1 = max_offset;
3928 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3934 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3935 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3937 sig = method->signature;
3940 cinfo = get_call_info (sig, FALSE);
3942 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3943 ArgInfo *ainfo = cinfo->args + i;
3944 gint32 stack_offset;
3946 inst = cfg->varinfo [i];
3948 if (sig->hasthis && (i == 0))
3949 arg_type = &mono_defaults.object_class->byval_arg;
3951 arg_type = sig->params [i - sig->hasthis];
3953 stack_offset = ainfo->offset + ARGS_OFFSET;
3955 if ((ainfo->storage == ArgInIReg) && (inst->opcode != OP_REGVAR)) {
3956 /* Argument in register, but need to be saved to stack */
3957 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, sizeof (gpointer));
3959 if ((ainfo->storage == ArgInSSEReg) && (inst->opcode != OP_REGVAR)) {
3960 /* Argument in register, but need to be saved to stack */
3961 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
3964 if (inst->opcode == OP_REGVAR) {
3965 /* Argument allocated to (non-volatile) register */
3972 cfg->code_len = code - cfg->native_code;
3978 mono_arch_emit_epilog (MonoCompile *cfg)
3980 MonoJumpInfo *patch_info;
3981 MonoMethod *method = cfg->method;
3982 MonoMethodSignature *sig = method->signature;
3984 guint32 stack_to_pop;
3987 code = cfg->native_code + cfg->code_len;
3989 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3990 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3992 /* the code restoring the registers must be kept in sync with CEE_JMP */
3995 if (method->save_lmf) {
3998 gint32 prev_lmf_reg;
4000 /* Find a spare register */
4001 prev_lmf_reg = GP_SCRATCH_REG;
4003 /* reg = previous_lmf */
4004 amd64_mov_reg_membase (code, prev_lmf_reg, AMD64_RBP, -32, sizeof (gpointer));
4007 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, -28, sizeof (gpointer));
4009 /* *(lmf) = previous_lmf */
4010 amd64_mov_membase_reg (code, AMD64_RCX, 0, prev_lmf_reg, sizeof (gpointer));
4012 /* restore caller saved regs */
4013 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4014 amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, -20, sizeof (gpointer));
4017 if (cfg->used_int_regs & (1 << AMD64_RDI)) {
4018 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RBP, -16, sizeof (gpointer));
4020 if (cfg->used_int_regs & (1 << AMD64_RSI)) {
4021 amd64_mov_reg_membase (code, AMD64_RSI, AMD64_RBP, -12, sizeof (gpointer));
4025 /* EBP is restored by LEAVE */
4028 for (i = 0; i < AMD64_NREG; ++i)
4029 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4030 pos -= sizeof (gpointer);
4033 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4035 /* Pop registers in reverse order */
4036 for (i = AMD64_NREG - 1; i > 0; --i)
4037 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4038 amd64_pop_reg (code, i);
4044 if (CALLCONV_IS_STDCALL (sig->call_convention)) {
4045 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4047 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4048 } else if (MONO_TYPE_ISSTRUCT (cfg->method->signature->ret))
4054 amd64_ret_imm (code, stack_to_pop);
4058 /* add code to raise exceptions */
4059 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4060 switch (patch_info->type) {
4061 case MONO_PATCH_INFO_EXC:
4062 amd64_patch (patch_info->ip.i + cfg->native_code, code);
4063 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
4064 amd64_push_imm (code, patch_info->data.target);
4065 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_METHOD_REL, (gpointer)patch_info->ip.i);
4066 amd64_push_imm (code, patch_info->ip.i + cfg->native_code);
4067 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4068 patch_info->data.name = "mono_arch_throw_exception_by_name";
4069 patch_info->ip.i = code - cfg->native_code;
4070 amd64_jump_code (code, 0);
4078 cfg->code_len = code - cfg->native_code;
4080 g_assert (cfg->code_len < cfg->code_size);
4085 mono_arch_flush_icache (guint8 *code, gint size)
4091 mono_arch_flush_register_windows (void)
4096 mono_arch_is_inst_imm (gint64 imm)
4098 return amd64_is_imm32 (imm);
4102 * Support for fast access to the thread-local lmf structure using the GS
4103 * segment register on NPTL + kernel 2.6.x.
4106 static gboolean tls_offset_inited = FALSE;
4109 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4111 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4112 pthread_t self = pthread_self();
4113 pthread_attr_t attr;
4114 void *staddr = NULL;
4116 struct sigaltstack sa;
4119 if (!tls_offset_inited) {
4122 tls_offset_inited = TRUE;
4124 code = (guint8*)mono_get_lmf_addr;
4126 if (getenv ("MONO_NPTL")) {
4128 * Determine the offset of mono_lfm_addr inside the TLS structures
4129 * by disassembling the function above.
4132 /* This is generated by gcc 3.3.2 */
4133 if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4134 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
4135 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
4136 (code [9] == 0x8b) && (code [10] == 0x80)) {
4137 lmf_tls_offset = *(int*)&(code [11]);
4140 /* This is generated by gcc-3.4 */
4141 if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4142 (code [3] == 0x65) && (code [4] == 0xa1)) {
4143 lmf_tls_offset = *(int*)&(code [5]);
4148 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4150 /* Determine stack boundaries */
4151 if (!mono_running_on_valgrind ()) {
4152 #ifdef HAVE_PTHREAD_GETATTR_NP
4153 pthread_getattr_np( self, &attr );
4155 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4156 pthread_attr_get_np( self, &attr );
4158 pthread_attr_init( &attr );
4159 pthread_attr_getstacksize( &attr, &stsize );
4161 #error "Not implemented"
4165 pthread_attr_getstack( &attr, &staddr, &stsize );
4170 * staddr seems to be wrong for the main thread, so we keep the value in
4173 tls->stack_size = stsize;
4175 /* Setup an alternate signal stack */
4176 tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4177 tls->signal_stack_size = SIGNAL_STACK_SIZE;
4179 sa.ss_sp = tls->signal_stack;
4180 sa.ss_size = SIGNAL_STACK_SIZE;
4181 sa.ss_flags = SS_ONSTACK;
4182 sigaltstack (&sa, NULL);
4187 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4189 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4190 struct sigaltstack sa;
4192 sa.ss_sp = tls->signal_stack;
4193 sa.ss_size = SIGNAL_STACK_SIZE;
4194 sa.ss_flags = SS_DISABLE;
4195 sigaltstack (&sa, NULL);
4197 if (tls->signal_stack)
4198 g_free (tls->signal_stack);
4203 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4206 /* add the this argument */
4207 if (this_reg != -1) {
4209 MONO_INST_NEW (cfg, this, OP_OUTARG);
4210 this->type = this_type;
4211 this->sreg1 = this_reg;
4212 mono_bblock_add_inst (cfg->cbb, this);
4217 MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4218 vtarg->type = STACK_MP;
4219 vtarg->sreg1 = vt_reg;
4220 mono_bblock_add_inst (cfg->cbb, vtarg);
4226 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4228 if (cmethod->klass == mono_defaults.math_class) {
4229 if (strcmp (cmethod->name, "Sin") == 0)
4231 else if (strcmp (cmethod->name, "Cos") == 0)
4233 else if (strcmp (cmethod->name, "Tan") == 0)
4235 else if (strcmp (cmethod->name, "Atan") == 0)
4237 else if (strcmp (cmethod->name, "Sqrt") == 0)
4239 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
4242 /* OP_FREM is not IEEE compatible */
4243 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
4256 mono_arch_print_tree (MonoInst *tree, int arity)