Wed Sep 28 17:12:48 CEST 2005 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *
11  * (C) 2003 Ximian, Inc.
12  */
13 #include "mini.h"
14 #include <string.h>
15 #include <math.h>
16 #include <unistd.h>
17 #include <sys/mman.h>
18
19 #include <mono/metadata/appdomain.h>
20 #include <mono/metadata/debug-helpers.h>
21 #include <mono/metadata/threads.h>
22 #include <mono/metadata/profiler-private.h>
23 #include <mono/utils/mono-math.h>
24
25 #include "trace.h"
26 #include "mini-amd64.h"
27 #include "inssel.h"
28 #include "cpu-amd64.h"
29
30 static gint lmf_tls_offset = -1;
31 static gint appdomain_tls_offset = -1;
32 static gint thread_tls_offset = -1;
33
34 static gboolean use_sse2 = !MONO_ARCH_USE_FPSTACK;
35
36 const char * const amd64_desc [OP_LAST];
37 static const char*const * ins_spec = amd64_desc;
38
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
40
41 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
42
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
48 #endif
49
50 #define SIGNAL_STACK_SIZE (64 * 1024)
51
52 #define ARGS_OFFSET 16
53 #define GP_SCRATCH_REG AMD64_R11
54
55 /*
56  * AMD64 register usage:
57  * - callee saved registers are used for global register allocation
58  * - %r11 is used for materializing 64 bit constants in opcodes
59  * - the rest is used for local allocation
60  */
61
62 /*
63  * Floating point comparison results:
64  *                  ZF PF CF
65  * A > B            0  0  0
66  * A < B            0  0  1
67  * A = B            1  0  0
68  * A > B            0  0  0
69  * UNORDERED        1  1  1
70  */
71
72 #define NOT_IMPLEMENTED g_assert_not_reached ()
73
74 const char*
75 mono_arch_regname (int reg) {
76         switch (reg) {
77         case AMD64_RAX: return "%rax";
78         case AMD64_RBX: return "%rbx";
79         case AMD64_RCX: return "%rcx";
80         case AMD64_RDX: return "%rdx";
81         case AMD64_RSP: return "%rsp";  
82         case AMD64_RBP: return "%rbp";
83         case AMD64_RDI: return "%rdi";
84         case AMD64_RSI: return "%rsi";
85         case AMD64_R8: return "%r8";
86         case AMD64_R9: return "%r9";
87         case AMD64_R10: return "%r10";
88         case AMD64_R11: return "%r11";
89         case AMD64_R12: return "%r12";
90         case AMD64_R13: return "%r13";
91         case AMD64_R14: return "%r14";
92         case AMD64_R15: return "%r15";
93         }
94         return "unknown";
95 }
96
97 static const char * xmmregs [] = {
98         "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8",
99         "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
100 };
101
102 const char*
103 mono_arch_fregname (int reg)
104 {
105         if (reg < AMD64_XMM_NREG)
106                 return xmmregs [reg];
107         else
108                 return "unknown";
109 }
110
111 static inline void 
112 amd64_patch (unsigned char* code, gpointer target)
113 {
114         /* Skip REX */
115         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
116                 code += 1;
117
118         if ((code [0] & 0xf8) == 0xb8) {
119                 /* amd64_set_reg_template */
120                 *(guint64*)(code + 1) = (guint64)target;
121         }
122         else if (code [0] == 0x8b) {
123                 /* mov 0(%rip), %dreg */
124                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
125         }
126         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
127                 /* call *<OFFSET>(%rip) */
128                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
129         }
130         else if ((code [0] == 0xe8)) {
131                 /* call <DISP> */
132                 gint64 disp = (guint8*)target - (guint8*)code;
133                 g_assert (amd64_is_imm32 (disp));
134                 x86_patch (code, (unsigned char*)target);
135         }
136         else
137                 x86_patch (code, (unsigned char*)target);
138 }
139
140 typedef enum {
141         ArgInIReg,
142         ArgInFloatSSEReg,
143         ArgInDoubleSSEReg,
144         ArgOnStack,
145         ArgValuetypeInReg,
146         ArgNone /* only in pair_storage */
147 } ArgStorage;
148
149 typedef struct {
150         gint16 offset;
151         gint8  reg;
152         ArgStorage storage;
153
154         /* Only if storage == ArgValuetypeInReg */
155         ArgStorage pair_storage [2];
156         gint8 pair_regs [2];
157 } ArgInfo;
158
159 typedef struct {
160         int nargs;
161         guint32 stack_usage;
162         guint32 reg_usage;
163         guint32 freg_usage;
164         gboolean need_stack_align;
165         ArgInfo ret;
166         ArgInfo sig_cookie;
167         ArgInfo args [1];
168 } CallInfo;
169
170 #define DEBUG(a) if (cfg->verbose_level > 1) a
171
172 #define NEW_ICONST(cfg,dest,val) do {   \
173                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
174                 (dest)->opcode = OP_ICONST;     \
175                 (dest)->inst_c0 = (val);        \
176                 (dest)->type = STACK_I4;        \
177         } while (0)
178
179 #define PARAM_REGS 6
180
181 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
182
183 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
184
185 static void inline
186 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
187 {
188     ainfo->offset = *stack_size;
189
190     if (*gr >= PARAM_REGS) {
191                 ainfo->storage = ArgOnStack;
192                 (*stack_size) += sizeof (gpointer);
193     }
194     else {
195                 ainfo->storage = ArgInIReg;
196                 ainfo->reg = param_regs [*gr];
197                 (*gr) ++;
198     }
199 }
200
201 #define FLOAT_PARAM_REGS 8
202
203 static void inline
204 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
205 {
206     ainfo->offset = *stack_size;
207
208     if (*gr >= FLOAT_PARAM_REGS) {
209                 ainfo->storage = ArgOnStack;
210                 (*stack_size) += sizeof (gpointer);
211     }
212     else {
213                 /* A double register */
214                 if (is_double)
215                         ainfo->storage = ArgInDoubleSSEReg;
216                 else
217                         ainfo->storage = ArgInFloatSSEReg;
218                 ainfo->reg = *gr;
219                 (*gr) += 1;
220     }
221 }
222
223 typedef enum ArgumentClass {
224         ARG_CLASS_NO_CLASS,
225         ARG_CLASS_MEMORY,
226         ARG_CLASS_INTEGER,
227         ARG_CLASS_SSE
228 } ArgumentClass;
229
230 static ArgumentClass
231 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
232 {
233         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
234         MonoType *ptype;
235
236         ptype = mono_type_get_underlying_type (type);
237         switch (ptype->type) {
238         case MONO_TYPE_BOOLEAN:
239         case MONO_TYPE_CHAR:
240         case MONO_TYPE_I1:
241         case MONO_TYPE_U1:
242         case MONO_TYPE_I2:
243         case MONO_TYPE_U2:
244         case MONO_TYPE_I4:
245         case MONO_TYPE_U4:
246         case MONO_TYPE_I:
247         case MONO_TYPE_U:
248         case MONO_TYPE_STRING:
249         case MONO_TYPE_OBJECT:
250         case MONO_TYPE_CLASS:
251         case MONO_TYPE_SZARRAY:
252         case MONO_TYPE_PTR:
253         case MONO_TYPE_FNPTR:
254         case MONO_TYPE_ARRAY:
255         case MONO_TYPE_I8:
256         case MONO_TYPE_U8:
257                 class2 = ARG_CLASS_INTEGER;
258                 break;
259         case MONO_TYPE_R4:
260         case MONO_TYPE_R8:
261                 class2 = ARG_CLASS_SSE;
262                 break;
263
264         case MONO_TYPE_TYPEDBYREF:
265                 g_assert_not_reached ();
266
267         case MONO_TYPE_VALUETYPE: {
268                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
269                 int i;
270
271                 for (i = 0; i < info->num_fields; ++i) {
272                         class2 = class1;
273                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
274                 }
275                 break;
276         }
277         default:
278                 g_assert_not_reached ();
279         }
280
281         /* Merge */
282         if (class1 == class2)
283                 ;
284         else if (class1 == ARG_CLASS_NO_CLASS)
285                 class1 = class2;
286         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
287                 class1 = ARG_CLASS_MEMORY;
288         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
289                 class1 = ARG_CLASS_INTEGER;
290         else
291                 class1 = ARG_CLASS_SSE;
292
293         return class1;
294 }
295
296 static void
297 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
298                gboolean is_return,
299                guint32 *gr, guint32 *fr, guint32 *stack_size)
300 {
301         guint32 size, quad, nquads, i;
302         ArgumentClass args [2];
303         MonoMarshalType *info;
304         MonoClass *klass;
305
306         klass = mono_class_from_mono_type (type);
307         if (sig->pinvoke) 
308                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
309         else 
310                 size = mono_type_stack_size (&klass->byval_arg, NULL);
311
312         if (!sig->pinvoke || (size == 0) || (size > 16)) {
313                 /* Allways pass in memory */
314                 ainfo->offset = *stack_size;
315                 *stack_size += ALIGN_TO (size, 8);
316                 ainfo->storage = ArgOnStack;
317
318                 return;
319         }
320
321         /* FIXME: Handle structs smaller than 8 bytes */
322         //if ((size % 8) != 0)
323         //      NOT_IMPLEMENTED;
324
325         if (size > 8)
326                 nquads = 2;
327         else
328                 nquads = 1;
329
330         /*
331          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
332          * The X87 and SSEUP stuff is left out since there are no such types in
333          * the CLR.
334          */
335         info = mono_marshal_load_type_info (klass);
336         g_assert (info);
337         if (info->native_size > 16) {
338                 ainfo->offset = *stack_size;
339                 *stack_size += ALIGN_TO (info->native_size, 8);
340                 ainfo->storage = ArgOnStack;
341
342                 return;
343         }
344
345         for (quad = 0; quad < nquads; ++quad) {
346                 int size, align;
347                 ArgumentClass class1;
348                 
349                 class1 = ARG_CLASS_NO_CLASS;
350                 for (i = 0; i < info->num_fields; ++i) {
351                         size = mono_marshal_type_size (info->fields [i].field->type, 
352                                                                                    info->fields [i].mspec, 
353                                                                                    &align, TRUE, klass->unicode);
354                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
355                                 /* Unaligned field */
356                                 NOT_IMPLEMENTED;
357                         }
358
359                         /* Skip fields in other quad */
360                         if ((quad == 0) && (info->fields [i].offset >= 8))
361                                 continue;
362                         if ((quad == 1) && (info->fields [i].offset < 8))
363                                 continue;
364
365                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
366                 }
367                 g_assert (class1 != ARG_CLASS_NO_CLASS);
368                 args [quad] = class1;
369         }
370
371         /* Post merger cleanup */
372         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
373                 args [0] = args [1] = ARG_CLASS_MEMORY;
374
375         /* Allocate registers */
376         {
377                 int orig_gr = *gr;
378                 int orig_fr = *fr;
379
380                 ainfo->storage = ArgValuetypeInReg;
381                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
382                 for (quad = 0; quad < nquads; ++quad) {
383                         switch (args [quad]) {
384                         case ARG_CLASS_INTEGER:
385                                 if (*gr >= PARAM_REGS)
386                                         args [quad] = ARG_CLASS_MEMORY;
387                                 else {
388                                         ainfo->pair_storage [quad] = ArgInIReg;
389                                         if (is_return)
390                                                 ainfo->pair_regs [quad] = return_regs [*gr];
391                                         else
392                                                 ainfo->pair_regs [quad] = param_regs [*gr];
393                                         (*gr) ++;
394                                 }
395                                 break;
396                         case ARG_CLASS_SSE:
397                                 if (*fr >= FLOAT_PARAM_REGS)
398                                         args [quad] = ARG_CLASS_MEMORY;
399                                 else {
400                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
401                                         ainfo->pair_regs [quad] = *fr;
402                                         (*fr) ++;
403                                 }
404                                 break;
405                         case ARG_CLASS_MEMORY:
406                                 break;
407                         default:
408                                 g_assert_not_reached ();
409                         }
410                 }
411
412                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
413                         /* Revert possible register assignments */
414                         *gr = orig_gr;
415                         *fr = orig_fr;
416
417                         ainfo->offset = *stack_size;
418                         *stack_size += ALIGN_TO (info->native_size, 8);
419                         ainfo->storage = ArgOnStack;
420                 }
421         }
422 }
423
424 /*
425  * get_call_info:
426  *
427  *  Obtain information about a call according to the calling convention.
428  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
429  * Draft Version 0.23" document for more information.
430  */
431 static CallInfo*
432 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
433 {
434         guint32 i, gr, fr;
435         MonoType *ret_type;
436         int n = sig->hasthis + sig->param_count;
437         guint32 stack_size = 0;
438         CallInfo *cinfo;
439
440         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
441
442         gr = 0;
443         fr = 0;
444
445         /* return value */
446         {
447                 ret_type = mono_type_get_underlying_type (sig->ret);
448                 switch (ret_type->type) {
449                 case MONO_TYPE_BOOLEAN:
450                 case MONO_TYPE_I1:
451                 case MONO_TYPE_U1:
452                 case MONO_TYPE_I2:
453                 case MONO_TYPE_U2:
454                 case MONO_TYPE_CHAR:
455                 case MONO_TYPE_I4:
456                 case MONO_TYPE_U4:
457                 case MONO_TYPE_I:
458                 case MONO_TYPE_U:
459                 case MONO_TYPE_PTR:
460                 case MONO_TYPE_FNPTR:
461                 case MONO_TYPE_CLASS:
462                 case MONO_TYPE_OBJECT:
463                 case MONO_TYPE_SZARRAY:
464                 case MONO_TYPE_ARRAY:
465                 case MONO_TYPE_STRING:
466                         cinfo->ret.storage = ArgInIReg;
467                         cinfo->ret.reg = AMD64_RAX;
468                         break;
469                 case MONO_TYPE_U8:
470                 case MONO_TYPE_I8:
471                         cinfo->ret.storage = ArgInIReg;
472                         cinfo->ret.reg = AMD64_RAX;
473                         break;
474                 case MONO_TYPE_R4:
475                         cinfo->ret.storage = ArgInFloatSSEReg;
476                         cinfo->ret.reg = AMD64_XMM0;
477                         break;
478                 case MONO_TYPE_R8:
479                         cinfo->ret.storage = ArgInDoubleSSEReg;
480                         cinfo->ret.reg = AMD64_XMM0;
481                         break;
482                 case MONO_TYPE_VALUETYPE: {
483                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
484
485                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
486                         if (cinfo->ret.storage == ArgOnStack)
487                                 /* The caller passes the address where the value is stored */
488                                 add_general (&gr, &stack_size, &cinfo->ret);
489                         break;
490                 }
491                 case MONO_TYPE_TYPEDBYREF:
492                         /* Same as a valuetype with size 24 */
493                         add_general (&gr, &stack_size, &cinfo->ret);
494                         ;
495                         break;
496                 case MONO_TYPE_VOID:
497                         break;
498                 default:
499                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
500                 }
501         }
502
503         /* this */
504         if (sig->hasthis)
505                 add_general (&gr, &stack_size, cinfo->args + 0);
506
507         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
508                 gr = PARAM_REGS;
509                 fr = FLOAT_PARAM_REGS;
510                 
511                 /* Emit the signature cookie just before the implicit arguments */
512                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
513         }
514
515         for (i = 0; i < sig->param_count; ++i) {
516                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
517                 MonoType *ptype;
518
519                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
520                         /* We allways pass the sig cookie on the stack for simplicity */
521                         /* 
522                          * Prevent implicit arguments + the sig cookie from being passed 
523                          * in registers.
524                          */
525                         gr = PARAM_REGS;
526                         fr = FLOAT_PARAM_REGS;
527
528                         /* Emit the signature cookie just before the implicit arguments */
529                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
530                 }
531
532                 if (sig->params [i]->byref) {
533                         add_general (&gr, &stack_size, ainfo);
534                         continue;
535                 }
536                 ptype = mono_type_get_underlying_type (sig->params [i]);
537                 switch (ptype->type) {
538                 case MONO_TYPE_BOOLEAN:
539                 case MONO_TYPE_I1:
540                 case MONO_TYPE_U1:
541                         add_general (&gr, &stack_size, ainfo);
542                         break;
543                 case MONO_TYPE_I2:
544                 case MONO_TYPE_U2:
545                 case MONO_TYPE_CHAR:
546                         add_general (&gr, &stack_size, ainfo);
547                         break;
548                 case MONO_TYPE_I4:
549                 case MONO_TYPE_U4:
550                         add_general (&gr, &stack_size, ainfo);
551                         break;
552                 case MONO_TYPE_I:
553                 case MONO_TYPE_U:
554                 case MONO_TYPE_PTR:
555                 case MONO_TYPE_FNPTR:
556                 case MONO_TYPE_CLASS:
557                 case MONO_TYPE_OBJECT:
558                 case MONO_TYPE_STRING:
559                 case MONO_TYPE_SZARRAY:
560                 case MONO_TYPE_ARRAY:
561                         add_general (&gr, &stack_size, ainfo);
562                         break;
563                 case MONO_TYPE_VALUETYPE:
564                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
565                         break;
566                 case MONO_TYPE_TYPEDBYREF:
567                         stack_size += sizeof (MonoTypedRef);
568                         ainfo->storage = ArgOnStack;
569                         break;
570                 case MONO_TYPE_U8:
571                 case MONO_TYPE_I8:
572                         add_general (&gr, &stack_size, ainfo);
573                         break;
574                 case MONO_TYPE_R4:
575                         add_float (&fr, &stack_size, ainfo, FALSE);
576                         break;
577                 case MONO_TYPE_R8:
578                         add_float (&fr, &stack_size, ainfo, TRUE);
579                         break;
580                 default:
581                         g_assert_not_reached ();
582                 }
583         }
584
585         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
586                 gr = PARAM_REGS;
587                 fr = FLOAT_PARAM_REGS;
588                 
589                 /* Emit the signature cookie just before the implicit arguments */
590                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
591         }
592
593         if (stack_size & 0x8) {
594                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
595                 cinfo->need_stack_align = TRUE;
596                 stack_size += 8;
597         }
598
599         cinfo->stack_usage = stack_size;
600         cinfo->reg_usage = gr;
601         cinfo->freg_usage = fr;
602         return cinfo;
603 }
604
605 /*
606  * mono_arch_get_argument_info:
607  * @csig:  a method signature
608  * @param_count: the number of parameters to consider
609  * @arg_info: an array to store the result infos
610  *
611  * Gathers information on parameters such as size, alignment and
612  * padding. arg_info should be large enought to hold param_count + 1 entries. 
613  *
614  * Returns the size of the argument area on the stack.
615  */
616 int
617 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
618 {
619         int k;
620         CallInfo *cinfo = get_call_info (csig, FALSE);
621         guint32 args_size = cinfo->stack_usage;
622
623         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
624         if (csig->hasthis) {
625                 arg_info [0].offset = 0;
626         }
627
628         for (k = 0; k < param_count; k++) {
629                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
630                 /* FIXME: */
631                 arg_info [k + 1].size = 0;
632         }
633
634         g_free (cinfo);
635
636         return args_size;
637 }
638
639 static int 
640 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
641 {
642         return 0;
643 }
644
645 /*
646  * Initialize the cpu to execute managed code.
647  */
648 void
649 mono_arch_cpu_init (void)
650 {
651         guint16 fpcw;
652
653         /* spec compliance requires running with double precision */
654         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
655         fpcw &= ~X86_FPCW_PRECC_MASK;
656         fpcw |= X86_FPCW_PREC_DOUBLE;
657         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
658         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
659 }
660
661 /*
662  * This function returns the optimizations supported on this cpu.
663  */
664 guint32
665 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
666 {
667         int eax, ebx, ecx, edx;
668         guint32 opts = 0;
669
670         /* FIXME: AMD64 */
671
672         *exclude_mask = 0;
673         /* Feature Flags function, flags returned in EDX. */
674         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
675                 if (edx & (1 << 15)) {
676                         opts |= MONO_OPT_CMOV;
677                         if (edx & 1)
678                                 opts |= MONO_OPT_FCMOV;
679                         else
680                                 *exclude_mask |= MONO_OPT_FCMOV;
681                 } else
682                         *exclude_mask |= MONO_OPT_CMOV;
683         }
684         return opts;
685 }
686
687 gboolean
688 mono_amd64_is_sse2 (void)
689 {
690         return use_sse2;
691 }
692
693 static gboolean
694 is_regsize_var (MonoType *t) {
695         if (t->byref)
696                 return TRUE;
697         t = mono_type_get_underlying_type (t);
698         switch (t->type) {
699         case MONO_TYPE_I4:
700         case MONO_TYPE_U4:
701         case MONO_TYPE_I:
702         case MONO_TYPE_U:
703         case MONO_TYPE_PTR:
704         case MONO_TYPE_FNPTR:
705                 return TRUE;
706         case MONO_TYPE_OBJECT:
707         case MONO_TYPE_STRING:
708         case MONO_TYPE_CLASS:
709         case MONO_TYPE_SZARRAY:
710         case MONO_TYPE_ARRAY:
711                 return TRUE;
712         case MONO_TYPE_VALUETYPE:
713                 return FALSE;
714         }
715         return FALSE;
716 }
717
718 GList *
719 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
720 {
721         GList *vars = NULL;
722         int i;
723
724         for (i = 0; i < cfg->num_varinfo; i++) {
725                 MonoInst *ins = cfg->varinfo [i];
726                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
727
728                 /* unused vars */
729                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
730                         continue;
731
732                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
733                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
734                         continue;
735
736                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
737                  * 8bit quantities in caller saved registers on x86 */
738                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
739                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
740                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
741                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
742                         g_assert (i == vmv->idx);
743                         vars = g_list_prepend (vars, vmv);
744                 }
745         }
746
747         vars = mono_varlist_sort (cfg, vars, 0);
748
749         return vars;
750 }
751
752 GList *
753 mono_arch_get_global_int_regs (MonoCompile *cfg)
754 {
755         GList *regs = NULL;
756
757         /* We use the callee saved registers for global allocation */
758         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
759         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
760         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
761         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
762         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
763
764         return regs;
765 }
766
767 /*
768  * mono_arch_regalloc_cost:
769  *
770  *  Return the cost, in number of memory references, of the action of 
771  * allocating the variable VMV into a register during global register
772  * allocation.
773  */
774 guint32
775 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
776 {
777         MonoInst *ins = cfg->varinfo [vmv->idx];
778
779         if (cfg->method->save_lmf)
780                 /* The register is already saved */
781                 /* substract 1 for the invisible store in the prolog */
782                 return (ins->opcode == OP_ARG) ? 0 : 1;
783         else
784                 /* push+pop */
785                 return (ins->opcode == OP_ARG) ? 1 : 2;
786 }
787  
788 void
789 mono_arch_allocate_vars (MonoCompile *m)
790 {
791         MonoMethodSignature *sig;
792         MonoMethodHeader *header;
793         MonoInst *inst;
794         int i, offset;
795         guint32 locals_stack_size, locals_stack_align;
796         gint32 *offsets;
797         CallInfo *cinfo;
798
799         header = mono_method_get_header (m->method);
800
801         sig = mono_method_signature (m->method);
802
803         cinfo = get_call_info (sig, FALSE);
804
805         /*
806          * We use the ABI calling conventions for managed code as well.
807          * Exception: valuetypes are never passed or returned in registers.
808          */
809
810         /* Locals are allocated backwards from %fp */
811         m->frame_reg = AMD64_RBP;
812         offset = 0;
813
814         /* Reserve space for caller saved registers */
815         for (i = 0; i < AMD64_NREG; ++i)
816                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (m->used_int_regs & (1 << i))) {
817                         offset += sizeof (gpointer);
818                 }
819
820         if (m->method->save_lmf) {
821                 /* Reserve stack space for saving LMF + argument regs */
822                 offset += sizeof (MonoLMF);
823                 if (lmf_tls_offset == -1)
824                         /* Need to save argument regs too */
825                         offset += (AMD64_NREG * 8) + (8 * 8);
826                 m->arch.lmf_offset = offset;
827         }
828
829         if (sig->ret->type != MONO_TYPE_VOID) {
830                 switch (cinfo->ret.storage) {
831                 case ArgInIReg:
832                 case ArgInFloatSSEReg:
833                 case ArgInDoubleSSEReg:
834                         if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
835                                 /* The register is volatile */
836                                 m->ret->opcode = OP_REGOFFSET;
837                                 m->ret->inst_basereg = AMD64_RBP;
838                                 offset += 8;
839                                 m->ret->inst_offset = - offset;
840                         }
841                         else {
842                                 m->ret->opcode = OP_REGVAR;
843                                 m->ret->inst_c0 = cinfo->ret.reg;
844                         }
845                         break;
846                 case ArgValuetypeInReg:
847                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
848                         offset += 16;
849                         m->ret->opcode = OP_REGOFFSET;
850                         m->ret->inst_basereg = AMD64_RBP;
851                         m->ret->inst_offset = - offset;
852                         break;
853                 default:
854                         g_assert_not_reached ();
855                 }
856                 m->ret->dreg = m->ret->inst_c0;
857         }
858
859         /* Allocate locals */
860         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
861         if (locals_stack_align) {
862                 offset += (locals_stack_align - 1);
863                 offset &= ~(locals_stack_align - 1);
864         }
865         for (i = m->locals_start; i < m->num_varinfo; i++) {
866                 if (offsets [i] != -1) {
867                         MonoInst *inst = m->varinfo [i];
868                         inst->opcode = OP_REGOFFSET;
869                         inst->inst_basereg = AMD64_RBP;
870                         inst->inst_offset = - (offset + offsets [i]);
871                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
872                 }
873         }
874         g_free (offsets);
875         offset += locals_stack_size;
876
877         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
878                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
879                 m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
880         }
881
882         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
883                 inst = m->varinfo [i];
884                 if (inst->opcode != OP_REGVAR) {
885                         ArgInfo *ainfo = &cinfo->args [i];
886                         gboolean inreg = TRUE;
887                         MonoType *arg_type;
888
889                         if (sig->hasthis && (i == 0))
890                                 arg_type = &mono_defaults.object_class->byval_arg;
891                         else
892                                 arg_type = sig->params [i - sig->hasthis];
893
894                         /* FIXME: Allocate volatile arguments to registers */
895                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
896                                 inreg = FALSE;
897
898                         /* 
899                          * Under AMD64, all registers used to pass arguments to functions
900                          * are volatile across calls.
901                          * FIXME: Optimize this.
902                          */
903                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg))
904                                 inreg = FALSE;
905
906                         inst->opcode = OP_REGOFFSET;
907
908                         switch (ainfo->storage) {
909                         case ArgInIReg:
910                         case ArgInFloatSSEReg:
911                         case ArgInDoubleSSEReg:
912                                 inst->opcode = OP_REGVAR;
913                                 inst->dreg = ainfo->reg;
914                                 break;
915                         case ArgOnStack:
916                                 inst->opcode = OP_REGOFFSET;
917                                 inst->inst_basereg = AMD64_RBP;
918                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
919                                 break;
920                         case ArgValuetypeInReg:
921                                 break;
922                         default:
923                                 NOT_IMPLEMENTED;
924                         }
925
926                         if (!inreg && (ainfo->storage != ArgOnStack)) {
927                                 inst->opcode = OP_REGOFFSET;
928                                 inst->inst_basereg = AMD64_RBP;
929                                 /* These arguments are saved to the stack in the prolog */
930                                 if (ainfo->storage == ArgValuetypeInReg)
931                                         offset += 2 * sizeof (gpointer);
932                                 else
933                                         offset += sizeof (gpointer);
934                                 inst->inst_offset = - offset;
935                         }
936                 }
937         }
938
939         m->stack_offset = offset;
940
941         g_free (cinfo);
942 }
943
944 void
945 mono_arch_create_vars (MonoCompile *cfg)
946 {
947         MonoMethodSignature *sig;
948         CallInfo *cinfo;
949
950         sig = mono_method_signature (cfg->method);
951
952         cinfo = get_call_info (sig, FALSE);
953
954         if (cinfo->ret.storage == ArgValuetypeInReg)
955                 cfg->ret_var_is_local = TRUE;
956
957         g_free (cinfo);
958 }
959
960 static void
961 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg, MonoInst *tree)
962 {
963         switch (storage) {
964         case ArgInIReg:
965                 arg->opcode = OP_OUTARG_REG;
966                 arg->inst_left = tree;
967                 arg->inst_right = (MonoInst*)call;
968                 arg->unused = reg;
969                 call->used_iregs |= 1 << reg;
970                 break;
971         case ArgInFloatSSEReg:
972                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
973                 arg->inst_left = tree;
974                 arg->inst_right = (MonoInst*)call;
975                 arg->unused = reg;
976                 call->used_fregs |= 1 << reg;
977                 break;
978         case ArgInDoubleSSEReg:
979                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
980                 arg->inst_left = tree;
981                 arg->inst_right = (MonoInst*)call;
982                 arg->unused = reg;
983                 call->used_fregs |= 1 << reg;
984                 break;
985         default:
986                 g_assert_not_reached ();
987         }
988 }
989
990 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
991  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
992  */
993
994 static int
995 arg_storage_to_ldind (ArgStorage storage)
996 {
997         switch (storage) {
998         case ArgInIReg:
999                 return CEE_LDIND_I;
1000         case ArgInDoubleSSEReg:
1001                 return CEE_LDIND_R8;
1002         case ArgInFloatSSEReg:
1003                 return CEE_LDIND_R4;
1004         default:
1005                 g_assert_not_reached ();
1006         }
1007
1008         return -1;
1009 }
1010
1011 /* 
1012  * take the arguments and generate the arch-specific
1013  * instructions to properly call the function in call.
1014  * This includes pushing, moving arguments to the right register
1015  * etc.
1016  * Issue: who does the spilling if needed, and when?
1017  */
1018 MonoCallInst*
1019 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
1020         MonoInst *arg, *in;
1021         MonoMethodSignature *sig;
1022         int i, n, stack_size;
1023         CallInfo *cinfo;
1024         ArgInfo *ainfo;
1025
1026         stack_size = 0;
1027
1028         sig = call->signature;
1029         n = sig->param_count + sig->hasthis;
1030
1031         cinfo = get_call_info (sig, sig->pinvoke);
1032
1033         for (i = 0; i < n; ++i) {
1034                 ainfo = cinfo->args + i;
1035
1036                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
1037                         MonoMethodSignature *tmp_sig;
1038                         
1039                         /* Emit the signature cookie just before the implicit arguments */
1040                         MonoInst *sig_arg;
1041                         /* FIXME: Add support for signature tokens to AOT */
1042                         cfg->disable_aot = TRUE;
1043
1044                         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1045
1046                         /*
1047                          * mono_ArgIterator_Setup assumes the signature cookie is 
1048                          * passed first and all the arguments which were before it are
1049                          * passed on the stack after the signature. So compensate by 
1050                          * passing a different signature.
1051                          */
1052                         tmp_sig = mono_metadata_signature_dup (call->signature);
1053                         tmp_sig->param_count -= call->signature->sentinelpos;
1054                         tmp_sig->sentinelpos = 0;
1055                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1056
1057                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1058                         sig_arg->inst_p0 = tmp_sig;
1059
1060                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1061                         arg->inst_left = sig_arg;
1062                         arg->type = STACK_PTR;
1063
1064                         /* prepend, so they get reversed */
1065                         arg->next = call->out_args;
1066                         call->out_args = arg;
1067                 }
1068
1069                 if (is_virtual && i == 0) {
1070                         /* the argument will be attached to the call instruction */
1071                         in = call->args [i];
1072                 } else {
1073                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1074                         in = call->args [i];
1075                         arg->cil_code = in->cil_code;
1076                         arg->inst_left = in;
1077                         arg->type = in->type;
1078                         /* prepend, so they get reversed */
1079                         arg->next = call->out_args;
1080                         call->out_args = arg;
1081
1082                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1083                                 gint align;
1084                                 guint32 size;
1085
1086                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1087                                         size = sizeof (MonoTypedRef);
1088                                         align = sizeof (gpointer);
1089                                 }
1090                                 else
1091                                 if (sig->pinvoke)
1092                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1093                                 else {
1094                                         /* 
1095                                          * Other backends use mono_type_stack_size (), but that
1096                                          * aligns the size to 8, which is larger than the size of
1097                                          * the source, leading to reads of invalid memory if the
1098                                          * source is at the end of address space.
1099                                          */
1100                                         size = mono_class_value_size (in->klass, &align);
1101                                 }
1102                                 if (ainfo->storage == ArgValuetypeInReg) {
1103                                         if (ainfo->pair_storage [1] == ArgNone) {
1104                                                 MonoInst *load;
1105
1106                                                 /* Simpler case */
1107
1108                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1109                                                 load->inst_left = in;
1110
1111                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1112                                         }
1113                                         else {
1114                                                 /* Trees can't be shared so make a copy */
1115                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1116                                                 MonoInst *load, *load2, *offset_ins;
1117
1118                                                 /* Reg1 */
1119                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1120                                                 load->ssa_op = MONO_SSA_LOAD;
1121                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1122
1123                                                 NEW_ICONST (cfg, offset_ins, 0);
1124                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1125                                                 load2->inst_left = load;
1126                                                 load2->inst_right = offset_ins;
1127
1128                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1129                                                 load->inst_left = load2;
1130
1131                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1132
1133                                                 /* Reg2 */
1134                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1135                                                 load->ssa_op = MONO_SSA_LOAD;
1136                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1137
1138                                                 NEW_ICONST (cfg, offset_ins, 8);
1139                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1140                                                 load2->inst_left = load;
1141                                                 load2->inst_right = offset_ins;
1142
1143                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1144                                                 load->inst_left = load2;
1145
1146                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1147                                                 arg->cil_code = in->cil_code;
1148                                                 arg->type = in->type;
1149                                                 /* prepend, so they get reversed */
1150                                                 arg->next = call->out_args;
1151                                                 call->out_args = arg;
1152
1153                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load);
1154
1155                                                 /* Prepend a copy inst */
1156                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1157                                                 arg->cil_code = in->cil_code;
1158                                                 arg->ssa_op = MONO_SSA_STORE;
1159                                                 arg->inst_left = vtaddr;
1160                                                 arg->inst_right = in;
1161                                                 arg->type = in->type;
1162
1163                                                 /* prepend, so they get reversed */
1164                                                 arg->next = call->out_args;
1165                                                 call->out_args = arg;
1166                                         }
1167                                 }
1168                                 else {
1169                                         arg->opcode = OP_OUTARG_VT;
1170                                         arg->klass = in->klass;
1171                                         arg->unused = sig->pinvoke;
1172                                         arg->inst_imm = size;
1173                                 }
1174                         }
1175                         else {
1176                                 switch (ainfo->storage) {
1177                                 case ArgInIReg:
1178                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1179                                         break;
1180                                 case ArgInFloatSSEReg:
1181                                 case ArgInDoubleSSEReg:
1182                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1183                                         break;
1184                                 case ArgOnStack:
1185                                         arg->opcode = OP_OUTARG;
1186                                         if (!sig->params [i - sig->hasthis]->byref) {
1187                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1188                                                         arg->opcode = OP_OUTARG_R4;
1189                                                 else
1190                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1191                                                                 arg->opcode = OP_OUTARG_R8;
1192                                         }
1193                                         break;
1194                                 default:
1195                                         g_assert_not_reached ();
1196                                 }
1197                         }
1198                 }
1199         }
1200
1201         if (cinfo->need_stack_align) {
1202                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1203                 /* prepend, so they get reversed */
1204                 arg->next = call->out_args;
1205                 call->out_args = arg;
1206         }
1207
1208         call->stack_usage = cinfo->stack_usage;
1209         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1210         cfg->flags |= MONO_CFG_HAS_CALLS;
1211
1212         g_free (cinfo);
1213
1214         return call;
1215 }
1216
1217 #define EMIT_COND_BRANCH(ins,cond,sign) \
1218 if (ins->flags & MONO_INST_BRLABEL) { \
1219         if (ins->inst_i0->inst_c0) { \
1220                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1221         } else { \
1222                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1223                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1224                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1225                         x86_branch8 (code, cond, 0, sign); \
1226                 else \
1227                         x86_branch32 (code, cond, 0, sign); \
1228         } \
1229 } else { \
1230         if (ins->inst_true_bb->native_offset) { \
1231                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1232         } else { \
1233                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1234                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1235                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1236                         x86_branch8 (code, cond, 0, sign); \
1237                 else \
1238                         x86_branch32 (code, cond, 0, sign); \
1239         } \
1240 }
1241
1242 /* emit an exception if condition is fail */
1243 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1244         do {                                                        \
1245                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1246                                     MONO_PATCH_INFO_EXC, exc_name);  \
1247                 x86_branch32 (code, cond, 0, signed);               \
1248         } while (0); 
1249
1250 #define EMIT_FPCOMPARE(code) do { \
1251         amd64_fcompp (code); \
1252         amd64_fnstsw (code); \
1253 } while (0); 
1254
1255 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
1256     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
1257         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
1258         amd64_ ##op (code); \
1259         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
1260         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
1261 } while (0);
1262
1263 static guint8*
1264 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1265 {
1266         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1267
1268         if (cfg->compile_aot) {
1269                 amd64_call_membase (code, AMD64_RIP, 0);
1270         }
1271         else {
1272                 gboolean near_call = FALSE;
1273
1274                 /*
1275                  * Indirect calls are expensive so try to make a near call if possible.
1276                  * The caller memory is allocated by the code manager so it is 
1277                  * guaranteed to be at a 32 bit offset.
1278                  */
1279
1280                 if (patch_type != MONO_PATCH_INFO_ABS) {
1281                         /* The target is in memory allocated using the code manager */
1282                         near_call = TRUE;
1283
1284                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
1285                                 if (((MonoMethod*)data)->klass->image->assembly->aot_module)
1286                                         /* The callee might be an AOT method */
1287                                         near_call = FALSE;
1288                         }
1289
1290                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
1291                                 /* 
1292                                  * The call might go directly to a native function without
1293                                  * the wrapper.
1294                                  */
1295                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name (data);
1296                                 if (mi) {
1297                                         gconstpointer target = mono_icall_get_wrapper (mi);
1298                                         if ((((guint64)target) >> 32) != 0)
1299                                                 near_call = FALSE;
1300                                 }
1301                         }
1302                 }
1303                 else {
1304                         if (mono_find_class_init_trampoline_by_addr (data))
1305                                 near_call = TRUE;
1306                         else {
1307                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
1308                                 if (info) {
1309                                         if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && 
1310                                                 strstr (cfg->method->name, info->name)) {
1311                                                 /* A call to the wrapped function */
1312                                                 if ((((guint64)data) >> 32) == 0)
1313                                                         near_call = TRUE;
1314                                         }
1315                                         else if (info->func == info->wrapper) {
1316                                                 /* No wrapper */
1317                                                 if ((((guint64)info->func) >> 32) == 0)
1318                                                         near_call = TRUE;
1319                                         }
1320                                         else
1321                                                 near_call = TRUE;
1322                                 }
1323                                 else if ((((guint64)data) >> 32) == 0)
1324                                         near_call = TRUE;
1325                         }
1326                 }
1327
1328                 if (cfg->method->dynamic)
1329                         /* These methods are allocated using malloc */
1330                         near_call = FALSE;
1331
1332                 if (near_call) {
1333                         amd64_call_code (code, 0);
1334                 }
1335                 else {
1336                         amd64_set_reg_template (code, GP_SCRATCH_REG);
1337                         amd64_call_reg (code, GP_SCRATCH_REG);
1338                 }
1339         }
1340
1341         return code;
1342 }
1343
1344 /* FIXME: Add more instructions */
1345 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI8_MEMBASE_REG) || ((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_SETREG) || ((ins)->opcode == OP_ICONST) || ((ins)->opcode == OP_I8CONST) || ((ins)->opcode == OP_LOAD_MEMBASE))
1346
1347 static void
1348 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1349 {
1350         MonoInst *ins, *last_ins = NULL;
1351         ins = bb->code;
1352
1353         while (ins) {
1354
1355                 switch (ins->opcode) {
1356                 case OP_ICONST:
1357                 case OP_I8CONST:
1358                         /* reg = 0 -> XOR (reg, reg) */
1359                         /* XOR sets cflags on x86, so we cant do it always */
1360                         if (ins->inst_c0 == 0 && (ins->next && INST_IGNORES_CFLAGS (ins->next))) {
1361                                 ins->opcode = CEE_XOR;
1362                                 ins->sreg1 = ins->dreg;
1363                                 ins->sreg2 = ins->dreg;
1364                         }
1365                         break;
1366                 case OP_MUL_IMM: 
1367                         /* remove unnecessary multiplication with 1 */
1368                         if (ins->inst_imm == 1) {
1369                                 if (ins->dreg != ins->sreg1) {
1370                                         ins->opcode = OP_MOVE;
1371                                 } else {
1372                                         last_ins->next = ins->next;
1373                                         ins = ins->next;
1374                                         continue;
1375                                 }
1376                         }
1377                         break;
1378                 case OP_COMPARE_IMM:
1379                         /* OP_COMPARE_IMM (reg, 0) 
1380                          * --> 
1381                          * OP_AMD64_TEST_NULL (reg) 
1382                          */
1383                         if (!ins->inst_imm)
1384                                 ins->opcode = OP_AMD64_TEST_NULL;
1385                         break;
1386                 case OP_ICOMPARE_IMM:
1387                         if (!ins->inst_imm)
1388                                 ins->opcode = OP_X86_TEST_NULL;
1389                         break;
1390                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
1391                         /* 
1392                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1393                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1394                          * -->
1395                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1396                          * OP_COMPARE_IMM reg, imm
1397                          *
1398                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1399                          */
1400                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1401                             ins->inst_basereg == last_ins->inst_destbasereg &&
1402                             ins->inst_offset == last_ins->inst_offset) {
1403                                         ins->opcode = OP_ICOMPARE_IMM;
1404                                         ins->sreg1 = last_ins->sreg1;
1405
1406                                         /* check if we can remove cmp reg,0 with test null */
1407                                         if (!ins->inst_imm)
1408                                                 ins->opcode = OP_X86_TEST_NULL;
1409                                 }
1410
1411                         break;
1412                 case OP_LOAD_MEMBASE:
1413                 case OP_LOADI4_MEMBASE:
1414                         /* 
1415                          * Note: if reg1 = reg2 the load op is removed
1416                          *
1417                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1418                          * OP_LOAD_MEMBASE offset(basereg), reg2
1419                          * -->
1420                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1421                          * OP_MOVE reg1, reg2
1422                          */
1423                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1424                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1425                             ins->inst_basereg == last_ins->inst_destbasereg &&
1426                             ins->inst_offset == last_ins->inst_offset) {
1427                                 if (ins->dreg == last_ins->sreg1) {
1428                                         last_ins->next = ins->next;                             
1429                                         ins = ins->next;                                
1430                                         continue;
1431                                 } else {
1432                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1433                                         ins->opcode = OP_MOVE;
1434                                         ins->sreg1 = last_ins->sreg1;
1435                                 }
1436
1437                         /* 
1438                          * Note: reg1 must be different from the basereg in the second load
1439                          * Note: if reg1 = reg2 is equal then second load is removed
1440                          *
1441                          * OP_LOAD_MEMBASE offset(basereg), reg1
1442                          * OP_LOAD_MEMBASE offset(basereg), reg2
1443                          * -->
1444                          * OP_LOAD_MEMBASE offset(basereg), reg1
1445                          * OP_MOVE reg1, reg2
1446                          */
1447                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1448                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1449                               ins->inst_basereg != last_ins->dreg &&
1450                               ins->inst_basereg == last_ins->inst_basereg &&
1451                               ins->inst_offset == last_ins->inst_offset) {
1452
1453                                 if (ins->dreg == last_ins->dreg) {
1454                                         last_ins->next = ins->next;                             
1455                                         ins = ins->next;                                
1456                                         continue;
1457                                 } else {
1458                                         ins->opcode = OP_MOVE;
1459                                         ins->sreg1 = last_ins->dreg;
1460                                 }
1461
1462                                 //g_assert_not_reached ();
1463
1464 #if 0
1465                         /* 
1466                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1467                          * OP_LOAD_MEMBASE offset(basereg), reg
1468                          * -->
1469                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1470                          * OP_ICONST reg, imm
1471                          */
1472                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1473                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1474                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1475                                    ins->inst_offset == last_ins->inst_offset) {
1476                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1477                                 ins->opcode = OP_ICONST;
1478                                 ins->inst_c0 = last_ins->inst_imm;
1479                                 g_assert_not_reached (); // check this rule
1480 #endif
1481                         }
1482                         break;
1483                 case OP_LOADU1_MEMBASE:
1484                 case OP_LOADI1_MEMBASE:
1485                         /* 
1486                          * Note: if reg1 = reg2 the load op is removed
1487                          *
1488                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1489                          * OP_LOAD_MEMBASE offset(basereg), reg2
1490                          * -->
1491                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1492                          * OP_MOVE reg1, reg2
1493                          */
1494                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1495                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1496                                         ins->inst_offset == last_ins->inst_offset) {
1497                                 if (ins->dreg == last_ins->sreg1) {
1498                                         last_ins->next = ins->next;                             
1499                                         ins = ins->next;                                
1500                                         continue;
1501                                 } else {
1502                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1503                                         ins->opcode = OP_MOVE;
1504                                         ins->sreg1 = last_ins->sreg1;
1505                                 }
1506                         }
1507                         break;
1508                 case OP_LOADU2_MEMBASE:
1509                 case OP_LOADI2_MEMBASE:
1510                         /* 
1511                          * Note: if reg1 = reg2 the load op is removed
1512                          *
1513                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1514                          * OP_LOAD_MEMBASE offset(basereg), reg2
1515                          * -->
1516                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1517                          * OP_MOVE reg1, reg2
1518                          */
1519                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1520                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1521                                         ins->inst_offset == last_ins->inst_offset) {
1522                                 if (ins->dreg == last_ins->sreg1) {
1523                                         last_ins->next = ins->next;                             
1524                                         ins = ins->next;                                
1525                                         continue;
1526                                 } else {
1527                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1528                                         ins->opcode = OP_MOVE;
1529                                         ins->sreg1 = last_ins->sreg1;
1530                                 }
1531                         }
1532                         break;
1533                 case CEE_CONV_I4:
1534                 case CEE_CONV_U4:
1535                 case OP_MOVE:
1536                 case OP_SETREG:
1537                         /*
1538                          * Removes:
1539                          *
1540                          * OP_MOVE reg, reg 
1541                          */
1542                         if (ins->dreg == ins->sreg1) {
1543                                 if (last_ins)
1544                                         last_ins->next = ins->next;                             
1545                                 ins = ins->next;
1546                                 continue;
1547                         }
1548                         /* 
1549                          * Removes:
1550                          *
1551                          * OP_MOVE sreg, dreg 
1552                          * OP_MOVE dreg, sreg
1553                          */
1554                         if (last_ins && last_ins->opcode == OP_MOVE &&
1555                             ins->sreg1 == last_ins->dreg &&
1556                             ins->dreg == last_ins->sreg1) {
1557                                 last_ins->next = ins->next;                             
1558                                 ins = ins->next;                                
1559                                 continue;
1560                         }
1561                         break;
1562                 }
1563                 last_ins = ins;
1564                 ins = ins->next;
1565         }
1566         bb->last_ins = last_ins;
1567 }
1568
1569 static void
1570 insert_after_ins (MonoBasicBlock *bb, MonoInst *ins, MonoInst *to_insert)
1571 {
1572         if (ins == NULL) {
1573                 ins = bb->code;
1574                 bb->code = to_insert;
1575                 to_insert->next = ins;
1576         }
1577         else {
1578                 to_insert->next = ins->next;
1579                 ins->next = to_insert;
1580         }
1581 }
1582
1583 #define NEW_INS(cfg,dest,op) do {       \
1584                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
1585                 (dest)->opcode = (op);  \
1586         insert_after_ins (bb, last_ins, (dest)); \
1587         } while (0)
1588
1589 /*
1590  * mono_arch_lowering_pass:
1591  *
1592  *  Converts complex opcodes into simpler ones so that each IR instruction
1593  * corresponds to one machine instruction.
1594  */
1595 static void
1596 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1597 {
1598         MonoInst *ins, *temp, *last_ins = NULL;
1599         ins = bb->code;
1600
1601         if (bb->max_ireg > cfg->rs->next_vireg)
1602                 cfg->rs->next_vireg = bb->max_ireg;
1603         if (bb->max_freg > cfg->rs->next_vfreg)
1604                 cfg->rs->next_vfreg = bb->max_freg;
1605
1606         /*
1607          * FIXME: Need to add more instructions, but the current machine 
1608          * description can't model some parts of the composite instructions like
1609          * cdq.
1610          */
1611         while (ins) {
1612                 switch (ins->opcode) {
1613                 case OP_DIV_IMM:
1614                 case OP_REM_IMM:
1615                 case OP_IDIV_IMM:
1616                 case OP_IREM_IMM:
1617                         NEW_INS (cfg, temp, OP_ICONST);
1618                         temp->inst_c0 = ins->inst_imm;
1619                         temp->dreg = mono_regstate_next_int (cfg->rs);
1620                         switch (ins->opcode) {
1621                         case OP_DIV_IMM:
1622                                 ins->opcode = OP_LDIV;
1623                                 break;
1624                         case OP_REM_IMM:
1625                                 ins->opcode = OP_LREM;
1626                                 break;
1627                         case OP_IDIV_IMM:
1628                                 ins->opcode = OP_IDIV;
1629                                 break;
1630                         case OP_IREM_IMM:
1631                                 ins->opcode = OP_IREM;
1632                                 break;
1633                         }
1634                         ins->sreg2 = temp->dreg;
1635                         break;
1636                 case OP_COMPARE_IMM:
1637                         if (!amd64_is_imm32 (ins->inst_imm)) {
1638                                 NEW_INS (cfg, temp, OP_I8CONST);
1639                                 temp->inst_c0 = ins->inst_imm;
1640                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1641                                 ins->opcode = OP_COMPARE;
1642                                 ins->sreg2 = temp->dreg;
1643                         }
1644                         break;
1645                 case OP_LOAD_MEMBASE:
1646                 case OP_LOADI8_MEMBASE:
1647                         if (!amd64_is_imm32 (ins->inst_offset)) {
1648                                 NEW_INS (cfg, temp, OP_I8CONST);
1649                                 temp->inst_c0 = ins->inst_offset;
1650                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1651                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
1652                                 ins->inst_indexreg = temp->dreg;
1653                         }
1654                         break;
1655                 case OP_STORE_MEMBASE_IMM:
1656                 case OP_STOREI8_MEMBASE_IMM:
1657                         if (!amd64_is_imm32 (ins->inst_imm)) {
1658                                 NEW_INS (cfg, temp, OP_I8CONST);
1659                                 temp->inst_c0 = ins->inst_imm;
1660                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1661                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
1662                                 ins->sreg1 = temp->dreg;
1663                         }
1664                         break;
1665                 default:
1666                         break;
1667                 }
1668                 last_ins = ins;
1669                 ins = ins->next;
1670         }
1671         bb->last_ins = last_ins;
1672
1673         bb->max_ireg = cfg->rs->next_vireg;
1674         bb->max_freg = cfg->rs->next_vfreg;
1675 }
1676
1677 static const int 
1678 branch_cc_table [] = {
1679         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1680         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1681         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1682 };
1683
1684 static int
1685 opcode_to_x86_cond (int opcode)
1686 {
1687         switch (opcode) {
1688         case OP_IBEQ:
1689                 return X86_CC_EQ;
1690         case OP_IBNE_UN:
1691                 return X86_CC_NE;
1692         case OP_IBLT:
1693                 return X86_CC_LT;
1694         case OP_IBLT_UN:
1695                 return X86_CC_LT;
1696         case OP_IBGT:
1697                 return X86_CC_GT;
1698         case OP_IBGT_UN:
1699                 return X86_CC_GT;
1700         case OP_IBGE:
1701                 return X86_CC_GE;
1702         case OP_IBGE_UN:
1703                 return X86_CC_GE;
1704         case OP_IBLE:
1705                 return X86_CC_LE;
1706         case OP_IBLE_UN:
1707                 return X86_CC_LE;
1708         case OP_COND_EXC_IOV:
1709                 return X86_CC_O;
1710         case OP_COND_EXC_IC:
1711                 return X86_CC_C;
1712         default:
1713                 g_assert_not_reached ();
1714         }
1715
1716         return -1;
1717 }
1718
1719 /*#include "cprop.c"*/
1720
1721 /*
1722  * Local register allocation.
1723  * We first scan the list of instructions and we save the liveness info of
1724  * each register (when the register is first used, when it's value is set etc.).
1725  * We also reverse the list of instructions (in the InstList list) because assigning
1726  * registers backwards allows for more tricks to be used.
1727  */
1728 void
1729 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1730 {
1731         if (!bb->code)
1732                 return;
1733
1734         mono_arch_lowering_pass (cfg, bb);
1735
1736         mono_local_regalloc (cfg, bb);
1737 }
1738
1739 static unsigned char*
1740 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
1741 {
1742         if (use_sse2) {
1743                 amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
1744         }
1745         else {
1746                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
1747                 x86_fnstcw_membase(code, AMD64_RSP, 0);
1748                 amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
1749                 amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1750                 amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
1751                 amd64_fldcw_membase (code, AMD64_RSP, 2);
1752                 amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
1753                 amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
1754                 amd64_pop_reg (code, dreg);
1755                 amd64_fldcw_membase (code, AMD64_RSP, 0);
1756                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
1757         }
1758
1759         if (size == 1)
1760                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
1761         else if (size == 2)
1762                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
1763         return code;
1764 }
1765
1766 static unsigned char*
1767 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1768 {
1769         int sreg = tree->sreg1;
1770         int need_touch = FALSE;
1771
1772 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1773         if (!tree->flags & MONO_INST_INIT)
1774                 need_touch = TRUE;
1775 #endif
1776
1777         if (need_touch) {
1778                 guint8* br[5];
1779
1780                 /*
1781                  * Under Windows:
1782                  * If requested stack size is larger than one page,
1783                  * perform stack-touch operation
1784                  */
1785                 /*
1786                  * Generate stack probe code.
1787                  * Under Windows, it is necessary to allocate one page at a time,
1788                  * "touching" stack after each successful sub-allocation. This is
1789                  * because of the way stack growth is implemented - there is a
1790                  * guard page before the lowest stack page that is currently commited.
1791                  * Stack normally grows sequentially so OS traps access to the
1792                  * guard page and commits more pages when needed.
1793                  */
1794                 amd64_test_reg_imm (code, sreg, ~0xFFF);
1795                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1796
1797                 br[2] = code; /* loop */
1798                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
1799                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
1800                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1801                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1802                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1803                 amd64_patch (br[3], br[2]);
1804                 amd64_test_reg_reg (code, sreg, sreg);
1805                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1806                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
1807
1808                 br[1] = code; x86_jump8 (code, 0);
1809
1810                 amd64_patch (br[0], code);
1811                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
1812                 amd64_patch (br[1], code);
1813                 amd64_patch (br[4], code);
1814         }
1815         else
1816                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
1817
1818         if (tree->flags & MONO_INST_INIT) {
1819                 int offset = 0;
1820                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
1821                         amd64_push_reg (code, AMD64_RAX);
1822                         offset += 8;
1823                 }
1824                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
1825                         amd64_push_reg (code, AMD64_RCX);
1826                         offset += 8;
1827                 }
1828                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
1829                         amd64_push_reg (code, AMD64_RDI);
1830                         offset += 8;
1831                 }
1832                 
1833                 amd64_shift_reg_imm (code, X86_SHR, sreg, 4);
1834                 if (sreg != AMD64_RCX)
1835                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
1836                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
1837                                 
1838                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
1839                 amd64_cld (code);
1840                 amd64_prefix (code, X86_REP_PREFIX);
1841                 amd64_stosl (code);
1842                 
1843                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
1844                         amd64_pop_reg (code, AMD64_RDI);
1845                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
1846                         amd64_pop_reg (code, AMD64_RCX);
1847                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
1848                         amd64_pop_reg (code, AMD64_RAX);
1849         }
1850         return code;
1851 }
1852
1853 static guint8*
1854 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1855 {
1856         CallInfo *cinfo;
1857         guint32 quad;
1858
1859         /* Move return value to the target register */
1860         /* FIXME: do this in the local reg allocator */
1861         switch (ins->opcode) {
1862         case CEE_CALL:
1863         case OP_CALL_REG:
1864         case OP_CALL_MEMBASE:
1865         case OP_LCALL:
1866         case OP_LCALL_REG:
1867         case OP_LCALL_MEMBASE:
1868                 g_assert (ins->dreg == AMD64_RAX);
1869                 break;
1870         case OP_FCALL:
1871         case OP_FCALL_REG:
1872         case OP_FCALL_MEMBASE:
1873                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
1874                         if (use_sse2)
1875                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
1876                         else {
1877                                 /* FIXME: optimize this */
1878                                 amd64_movss_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0);
1879                                 amd64_fld_membase (code, AMD64_RSP, -8, FALSE);
1880                         }
1881                 }
1882                 else {
1883                         if (use_sse2) {
1884                                 if (ins->dreg != AMD64_XMM0)
1885                                         amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
1886                         }
1887                         else {
1888                                 /* FIXME: optimize this */
1889                                 amd64_movsd_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0);
1890                                 amd64_fld_membase (code, AMD64_RSP, -8, TRUE);
1891                         }
1892                 }
1893                 break;
1894         case OP_VCALL:
1895         case OP_VCALL_REG:
1896         case OP_VCALL_MEMBASE:
1897                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1898                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1899                         /* Pop the destination address from the stack */
1900                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
1901                         amd64_pop_reg (code, AMD64_RCX);
1902                         
1903                         for (quad = 0; quad < 2; quad ++) {
1904                                 switch (cinfo->ret.pair_storage [quad]) {
1905                                 case ArgInIReg:
1906                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
1907                                         break;
1908                                 case ArgInFloatSSEReg:
1909                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
1910                                         break;
1911                                 case ArgInDoubleSSEReg:
1912                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
1913                                         break;
1914                                 case ArgNone:
1915                                         break;
1916                                 default:
1917                                         NOT_IMPLEMENTED;
1918                                 }
1919                         }
1920                 }
1921                 g_free (cinfo);
1922                 break;
1923         }
1924
1925         return code;
1926 }
1927
1928 /*
1929  * emit_load_volatile_arguments:
1930  *
1931  *  Load volatile arguments from the stack to the original input registers.
1932  * Required before a tail call.
1933  */
1934 static guint8*
1935 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1936 {
1937         MonoMethod *method = cfg->method;
1938         MonoMethodSignature *sig;
1939         MonoInst *inst;
1940         CallInfo *cinfo;
1941         guint32 i;
1942
1943         /* FIXME: Generate intermediate code instead */
1944
1945         sig = mono_method_signature (method);
1946
1947         cinfo = get_call_info (sig, FALSE);
1948         
1949         /* This is the opposite of the code in emit_prolog */
1950
1951         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1952                 ArgInfo *ainfo = cinfo->args + i;
1953                 MonoType *arg_type;
1954                 inst = cfg->varinfo [i];
1955
1956                 if (sig->hasthis && (i == 0))
1957                         arg_type = &mono_defaults.object_class->byval_arg;
1958                 else
1959                         arg_type = sig->params [i - sig->hasthis];
1960
1961                 if (inst->opcode != OP_REGVAR) {
1962                         switch (ainfo->storage) {
1963                         case ArgInIReg: {
1964                                 guint32 size = 8;
1965
1966                                 /* FIXME: I1 etc */
1967                                 amd64_mov_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset, size);
1968                                 break;
1969                         }
1970                         case ArgInFloatSSEReg:
1971                                 amd64_movss_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
1972                                 break;
1973                         case ArgInDoubleSSEReg:
1974                                 amd64_movsd_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
1975                                 break;
1976                         default:
1977                                 break;
1978                         }
1979                 }
1980         }
1981
1982         g_free (cinfo);
1983
1984         return code;
1985 }
1986
1987 #define REAL_PRINT_REG(text,reg) \
1988 mono_assert (reg >= 0); \
1989 amd64_push_reg (code, AMD64_RAX); \
1990 amd64_push_reg (code, AMD64_RDX); \
1991 amd64_push_reg (code, AMD64_RCX); \
1992 amd64_push_reg (code, reg); \
1993 amd64_push_imm (code, reg); \
1994 amd64_push_imm (code, text " %d %p\n"); \
1995 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
1996 amd64_call_reg (code, AMD64_RAX); \
1997 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
1998 amd64_pop_reg (code, AMD64_RCX); \
1999 amd64_pop_reg (code, AMD64_RDX); \
2000 amd64_pop_reg (code, AMD64_RAX);
2001
2002 /* benchmark and set based on cpu */
2003 #define LOOP_ALIGNMENT 8
2004 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2005
2006 void
2007 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2008 {
2009         MonoInst *ins;
2010         MonoCallInst *call;
2011         guint offset;
2012         guint8 *code = cfg->native_code + cfg->code_len;
2013         MonoInst *last_ins = NULL;
2014         guint last_offset = 0;
2015         int max_len, cpos;
2016
2017         if (cfg->opt & MONO_OPT_PEEPHOLE)
2018                 peephole_pass (cfg, bb);
2019
2020         if (cfg->opt & MONO_OPT_LOOP) {
2021                 int pad, align = LOOP_ALIGNMENT;
2022                 /* set alignment depending on cpu */
2023                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2024                         pad = align - pad;
2025                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2026                         amd64_padding (code, pad);
2027                         cfg->code_len += pad;
2028                         bb->native_offset = cfg->code_len;
2029                 }
2030         }
2031
2032         if (cfg->verbose_level > 2)
2033                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2034
2035         cpos = bb->max_offset;
2036
2037         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2038                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2039                 g_assert (!cfg->compile_aot);
2040                 cpos += 6;
2041
2042                 cov->data [bb->dfn].cil_code = bb->cil_code;
2043                 /* this is not thread save, but good enough */
2044                 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count); 
2045         }
2046
2047         offset = code - cfg->native_code;
2048
2049         ins = bb->code;
2050         while (ins) {
2051                 offset = code - cfg->native_code;
2052
2053                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2054
2055                 if (offset > (cfg->code_size - max_len - 16)) {
2056                         cfg->code_size *= 2;
2057                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2058                         code = cfg->native_code + offset;
2059                         mono_jit_stats.code_reallocs++;
2060                 }
2061
2062                 mono_debug_record_line_number (cfg, ins, offset);
2063
2064                 switch (ins->opcode) {
2065                 case OP_BIGMUL:
2066                         amd64_mul_reg (code, ins->sreg2, TRUE);
2067                         break;
2068                 case OP_BIGMUL_UN:
2069                         amd64_mul_reg (code, ins->sreg2, FALSE);
2070                         break;
2071                 case OP_X86_SETEQ_MEMBASE:
2072                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2073                         break;
2074                 case OP_STOREI1_MEMBASE_IMM:
2075                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2076                         break;
2077                 case OP_STOREI2_MEMBASE_IMM:
2078                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2079                         break;
2080                 case OP_STOREI4_MEMBASE_IMM:
2081                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2082                         break;
2083                 case OP_STOREI1_MEMBASE_REG:
2084                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2085                         break;
2086                 case OP_STOREI2_MEMBASE_REG:
2087                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2088                         break;
2089                 case OP_STORE_MEMBASE_REG:
2090                 case OP_STOREI8_MEMBASE_REG:
2091                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2092                         break;
2093                 case OP_STOREI4_MEMBASE_REG:
2094                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2095                         break;
2096                 case OP_STORE_MEMBASE_IMM:
2097                 case OP_STOREI8_MEMBASE_IMM:
2098                         g_assert (amd64_is_imm32 (ins->inst_imm));
2099                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2100                         break;
2101                 case CEE_LDIND_I:
2102                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, sizeof (gpointer));
2103                         break;
2104                 case CEE_LDIND_I4:
2105                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2106                         break;
2107                 case CEE_LDIND_U4:
2108                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2109                         break;
2110                 case OP_LOADU4_MEM:
2111                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2112                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2113                         break;
2114                 case OP_LOAD_MEMBASE:
2115                 case OP_LOADI8_MEMBASE:
2116                         g_assert (amd64_is_imm32 (ins->inst_offset));
2117                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2118                         break;
2119                 case OP_LOADI4_MEMBASE:
2120                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2121                         break;
2122                 case OP_LOADU4_MEMBASE:
2123                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2124                         break;
2125                 case OP_LOADU1_MEMBASE:
2126                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2127                         break;
2128                 case OP_LOADI1_MEMBASE:
2129                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2130                         break;
2131                 case OP_LOADU2_MEMBASE:
2132                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2133                         break;
2134                 case OP_LOADI2_MEMBASE:
2135                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2136                         break;
2137                 case OP_AMD64_LOADI8_MEMINDEX:
2138                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
2139                         break;
2140                 case CEE_CONV_I1:
2141                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2142                         break;
2143                 case CEE_CONV_I2:
2144                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2145                         break;
2146                 case CEE_CONV_U1:
2147                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2148                         break;
2149                 case CEE_CONV_U2:
2150                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2151                         break;
2152                 case CEE_CONV_U8:
2153                 case CEE_CONV_U:
2154                         /* Clean out the upper word */
2155                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2156                         break;
2157                 case CEE_CONV_I8:
2158                 case CEE_CONV_I:
2159                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2160                         break;                  
2161                 case OP_COMPARE:
2162                 case OP_LCOMPARE:
2163                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2164                         break;
2165                 case OP_COMPARE_IMM:
2166                         g_assert (amd64_is_imm32 (ins->inst_imm));
2167                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2168                         break;
2169                 case OP_X86_COMPARE_REG_MEMBASE:
2170                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2171                         break;
2172                 case OP_X86_TEST_NULL:
2173                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
2174                         break;
2175                 case OP_AMD64_TEST_NULL:
2176                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
2177                         break;
2178                 case OP_X86_ADD_MEMBASE_IMM:
2179                         /* FIXME: Make a 64 version too */
2180                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2181                         break;
2182                 case OP_X86_ADD_MEMBASE:
2183                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2184                         break;
2185                 case OP_X86_SUB_MEMBASE_IMM:
2186                         g_assert (amd64_is_imm32 (ins->inst_imm));
2187                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2188                         break;
2189                 case OP_X86_SUB_MEMBASE:
2190                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2191                         break;
2192                 case OP_X86_INC_MEMBASE:
2193                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2194                         break;
2195                 case OP_X86_INC_REG:
2196                         amd64_inc_reg_size (code, ins->dreg, 4);
2197                         break;
2198                 case OP_X86_DEC_MEMBASE:
2199                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2200                         break;
2201                 case OP_X86_DEC_REG:
2202                         amd64_dec_reg_size (code, ins->dreg, 4);
2203                         break;
2204                 case OP_X86_MUL_MEMBASE:
2205                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2206                         break;
2207                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
2208                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2209                         break;
2210                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2211                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2212                         break;
2213                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
2214                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2215                         break;
2216                 case CEE_BREAK:
2217                         amd64_breakpoint (code);
2218                         break;
2219                 case OP_ADDCC:
2220                 case CEE_ADD:
2221                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2222                         break;
2223                 case OP_ADC:
2224                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2225                         break;
2226                 case OP_ADD_IMM:
2227                         g_assert (amd64_is_imm32 (ins->inst_imm));
2228                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2229                         break;
2230                 case OP_ADC_IMM:
2231                         g_assert (amd64_is_imm32 (ins->inst_imm));
2232                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2233                         break;
2234                 case OP_SUBCC:
2235                 case CEE_SUB:
2236                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2237                         break;
2238                 case OP_SBB:
2239                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2240                         break;
2241                 case OP_SUB_IMM:
2242                         g_assert (amd64_is_imm32 (ins->inst_imm));
2243                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2244                         break;
2245                 case OP_SBB_IMM:
2246                         g_assert (amd64_is_imm32 (ins->inst_imm));
2247                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2248                         break;
2249                 case CEE_AND:
2250                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2251                         break;
2252                 case OP_AND_IMM:
2253                         g_assert (amd64_is_imm32 (ins->inst_imm));
2254                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2255                         break;
2256                 case CEE_MUL:
2257                 case OP_LMUL:
2258                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2259                         break;
2260                 case OP_MUL_IMM:
2261                 case OP_LMUL_IMM:
2262                         amd64_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2263                         break;
2264                 case CEE_DIV:
2265                 case OP_LDIV:
2266                         amd64_cdq (code);
2267                         amd64_div_reg (code, ins->sreg2, TRUE);
2268                         break;
2269                 case CEE_DIV_UN:
2270                 case OP_LDIV_UN:
2271                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2272                         amd64_div_reg (code, ins->sreg2, FALSE);
2273                         break;
2274                 case CEE_REM:
2275                 case OP_LREM:
2276                         amd64_cdq (code);
2277                         amd64_div_reg (code, ins->sreg2, TRUE);
2278                         break;
2279                 case CEE_REM_UN:
2280                 case OP_LREM_UN:
2281                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2282                         amd64_div_reg (code, ins->sreg2, FALSE);
2283                         break;
2284                 case OP_LMUL_OVF:
2285                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2286                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2287                         break;
2288                 case CEE_OR:
2289                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2290                         break;
2291                 case OP_OR_IMM
2292 :                       g_assert (amd64_is_imm32 (ins->inst_imm));
2293                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2294                         break;
2295                 case CEE_XOR:
2296                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2297                         break;
2298                 case OP_XOR_IMM:
2299                         g_assert (amd64_is_imm32 (ins->inst_imm));
2300                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2301                         break;
2302                 case CEE_SHL:
2303                 case OP_LSHL:
2304                         g_assert (ins->sreg2 == AMD64_RCX);
2305                         amd64_shift_reg (code, X86_SHL, ins->dreg);
2306                         break;
2307                 case CEE_SHR:
2308                 case OP_LSHR:
2309                         g_assert (ins->sreg2 == AMD64_RCX);
2310                         amd64_shift_reg (code, X86_SAR, ins->dreg);
2311                         break;
2312                 case OP_SHR_IMM:
2313                         g_assert (amd64_is_imm32 (ins->inst_imm));
2314                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2315                         break;
2316                 case OP_LSHR_IMM:
2317                         g_assert (amd64_is_imm32 (ins->inst_imm));
2318                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2319                         break;
2320                 case OP_SHR_UN_IMM:
2321                         g_assert (amd64_is_imm32 (ins->inst_imm));
2322                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2323                         break;
2324                 case OP_LSHR_UN_IMM:
2325                         g_assert (amd64_is_imm32 (ins->inst_imm));
2326                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2327                         break;
2328                 case CEE_SHR_UN:
2329                         g_assert (ins->sreg2 == AMD64_RCX);
2330                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2331                         break;
2332                 case OP_LSHR_UN:
2333                         g_assert (ins->sreg2 == AMD64_RCX);
2334                         amd64_shift_reg (code, X86_SHR, ins->dreg);
2335                         break;
2336                 case OP_SHL_IMM:
2337                         g_assert (amd64_is_imm32 (ins->inst_imm));
2338                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2339                         break;
2340                 case OP_LSHL_IMM:
2341                         g_assert (amd64_is_imm32 (ins->inst_imm));
2342                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2343                         break;
2344
2345                 case OP_IADDCC:
2346                 case OP_IADD:
2347                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
2348                         break;
2349                 case OP_IADC:
2350                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
2351                         break;
2352                 case OP_IADD_IMM:
2353                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
2354                         break;
2355                 case OP_IADC_IMM:
2356                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
2357                         break;
2358                 case OP_ISUBCC:
2359                 case OP_ISUB:
2360                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
2361                         break;
2362                 case OP_ISBB:
2363                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
2364                         break;
2365                 case OP_ISUB_IMM:
2366                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
2367                         break;
2368                 case OP_ISBB_IMM:
2369                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
2370                         break;
2371                 case OP_IAND:
2372                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
2373                         break;
2374                 case OP_IAND_IMM:
2375                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
2376                         break;
2377                 case OP_IOR:
2378                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
2379                         break;
2380                 case OP_IOR_IMM:
2381                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
2382                         break;
2383                 case OP_IXOR:
2384                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
2385                         break;
2386                 case OP_IXOR_IMM:
2387                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
2388                         break;
2389                 case OP_INEG:
2390                         amd64_neg_reg_size (code, ins->sreg1, 4);
2391                         break;
2392                 case OP_INOT:
2393                         amd64_not_reg_size (code, ins->sreg1, 4);
2394                         break;
2395                 case OP_ISHL:
2396                         g_assert (ins->sreg2 == AMD64_RCX);
2397                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
2398                         break;
2399                 case OP_ISHR:
2400                         g_assert (ins->sreg2 == AMD64_RCX);
2401                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
2402                         break;
2403                 case OP_ISHR_IMM:
2404                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2405                         break;
2406                 case OP_ISHR_UN_IMM:
2407                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2408                         break;
2409                 case OP_ISHR_UN:
2410                         g_assert (ins->sreg2 == AMD64_RCX);
2411                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2412                         break;
2413                 case OP_ISHL_IMM:
2414                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2415                         break;
2416                 case OP_IMUL:
2417                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2418                         break;
2419                 case OP_IMUL_IMM:
2420                         amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
2421                         break;
2422                 case OP_IMUL_OVF:
2423                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2424                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2425                         break;
2426                 case OP_IMUL_OVF_UN:
2427                 case OP_LMUL_OVF_UN: {
2428                         /* the mul operation and the exception check should most likely be split */
2429                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2430                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
2431                         /*g_assert (ins->sreg2 == X86_EAX);
2432                         g_assert (ins->dreg == X86_EAX);*/
2433                         if (ins->sreg2 == X86_EAX) {
2434                                 non_eax_reg = ins->sreg1;
2435                         } else if (ins->sreg1 == X86_EAX) {
2436                                 non_eax_reg = ins->sreg2;
2437                         } else {
2438                                 /* no need to save since we're going to store to it anyway */
2439                                 if (ins->dreg != X86_EAX) {
2440                                         saved_eax = TRUE;
2441                                         amd64_push_reg (code, X86_EAX);
2442                                 }
2443                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
2444                                 non_eax_reg = ins->sreg2;
2445                         }
2446                         if (ins->dreg == X86_EDX) {
2447                                 if (!saved_eax) {
2448                                         saved_eax = TRUE;
2449                                         amd64_push_reg (code, X86_EAX);
2450                                 }
2451                         } else {
2452                                 saved_edx = TRUE;
2453                                 amd64_push_reg (code, X86_EDX);
2454                         }
2455                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
2456                         /* save before the check since pop and mov don't change the flags */
2457                         if (ins->dreg != X86_EAX)
2458                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
2459                         if (saved_edx)
2460                                 amd64_pop_reg (code, X86_EDX);
2461                         if (saved_eax)
2462                                 amd64_pop_reg (code, X86_EAX);
2463                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2464                         break;
2465                 }
2466                 case OP_IDIV:
2467                         amd64_cdq_size (code, 4);
2468                         amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
2469                         break;
2470                 case OP_IDIV_UN:
2471                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2472                         amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
2473                         break;
2474                 case OP_IREM:
2475                         amd64_cdq_size (code, 4);
2476                         amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
2477                         break;
2478                 case OP_IREM_UN:
2479                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2480                         amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
2481                         break;
2482                 case OP_ICOMPARE:
2483                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
2484                         break;
2485                 case OP_ICOMPARE_IMM:
2486                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
2487                         break;
2488                 case OP_IBEQ:
2489                 case OP_IBLT:
2490                 case OP_IBGT:
2491                 case OP_IBGE:
2492                 case OP_IBLE:
2493                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
2494                         break;
2495                 case OP_IBNE_UN:
2496                 case OP_IBLT_UN:
2497                 case OP_IBGT_UN:
2498                 case OP_IBGE_UN:
2499                 case OP_IBLE_UN:
2500                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
2501                         break;
2502                 case OP_COND_EXC_IOV:
2503                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
2504                                                                                 TRUE, ins->inst_p1);
2505                         break;
2506                 case OP_COND_EXC_IC:
2507                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
2508                                                                                 FALSE, ins->inst_p1);
2509                         break;
2510                 case CEE_NOT:
2511                         amd64_not_reg (code, ins->sreg1);
2512                         break;
2513                 case CEE_NEG:
2514                         amd64_neg_reg (code, ins->sreg1);
2515                         break;
2516                 case OP_SEXT_I1:
2517                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2518                         break;
2519                 case OP_SEXT_I2:
2520                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2521                         break;
2522                 case OP_SEXT_I4:
2523                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2524                         break;
2525                 case OP_ICONST:
2526                 case OP_I8CONST:
2527                         if ((((guint64)ins->inst_c0) >> 32) == 0)
2528                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
2529                         else
2530                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
2531                         break;
2532                 case OP_AOTCONST:
2533                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2534                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8);
2535                         break;
2536                 case CEE_CONV_I4:
2537                 case CEE_CONV_U4:
2538                 case OP_MOVE:
2539                 case OP_SETREG:
2540                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
2541                         break;
2542                 case OP_AMD64_SET_XMMREG_R4: {
2543                         if (use_sse2) {
2544                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
2545                         }
2546                         else {
2547                                 amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE);
2548                                 /* ins->dreg is set to -1 by the reg allocator */
2549                                 amd64_movss_reg_membase (code, ins->unused, AMD64_RSP, -8);
2550                         }
2551                         break;
2552                 }
2553                 case OP_AMD64_SET_XMMREG_R8: {
2554                         if (use_sse2) {
2555                                 if (ins->dreg != ins->sreg1)
2556                                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
2557                         }
2558                         else {
2559                                 amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE);
2560                                 /* ins->dreg is set to -1 by the reg allocator */
2561                                 amd64_movsd_reg_membase (code, ins->unused, AMD64_RSP, -8);
2562                         }
2563                         break;
2564                 }
2565                 case CEE_JMP: {
2566                         /*
2567                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2568                          * Keep in sync with the code in emit_epilog.
2569                          */
2570                         int pos = 0, i;
2571
2572                         /* FIXME: no tracing support... */
2573                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2574                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2575
2576                         g_assert (!cfg->method->save_lmf);
2577
2578                         code = emit_load_volatile_arguments (cfg, code);
2579
2580                         for (i = 0; i < AMD64_NREG; ++i)
2581                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
2582                                         pos -= sizeof (gpointer);
2583                         
2584                         if (pos)
2585                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
2586
2587                         /* Pop registers in reverse order */
2588                         for (i = AMD64_NREG - 1; i > 0; --i)
2589                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
2590                                         amd64_pop_reg (code, i);
2591                                 }
2592
2593                         amd64_leave (code);
2594                         offset = code - cfg->native_code;
2595                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2596                         if (cfg->compile_aot)
2597                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
2598                         else
2599                                 amd64_set_reg_template (code, AMD64_R11);
2600                         amd64_jump_reg (code, AMD64_R11);
2601                         break;
2602                 }
2603                 case OP_CHECK_THIS:
2604                         /* ensure ins->sreg1 is not NULL */
2605                         amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
2606                         break;
2607                 case OP_ARGLIST: {
2608                         amd64_lea_membase (code, AMD64_R11, AMD64_RBP, cfg->sig_cookie);
2609                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8);
2610                         break;
2611                 }
2612                 case OP_FCALL:
2613                 case OP_LCALL:
2614                 case OP_VCALL:
2615                 case OP_VOIDCALL:
2616                 case CEE_CALL:
2617                         call = (MonoCallInst*)ins;
2618                         /*
2619                          * The AMD64 ABI forces callers to know about varargs.
2620                          */
2621                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
2622                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2623                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
2624                                 /* 
2625                                  * Since the unmanaged calling convention doesn't contain a 
2626                                  * 'vararg' entry, we have to treat every pinvoke call as a
2627                                  * potential vararg call.
2628                                  */
2629                                 guint32 nregs, i;
2630                                 nregs = 0;
2631                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
2632                                         if (call->used_fregs & (1 << i))
2633                                                 nregs ++;
2634                                 if (!nregs)
2635                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2636                                 else
2637                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
2638                         }
2639
2640                         if (ins->flags & MONO_INST_HAS_METHOD)
2641                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2642                         else
2643                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2644                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2645                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2646                         code = emit_move_return_value (cfg, ins, code);
2647                         break;
2648                 case OP_FCALL_REG:
2649                 case OP_LCALL_REG:
2650                 case OP_VCALL_REG:
2651                 case OP_VOIDCALL_REG:
2652                 case OP_CALL_REG:
2653                         call = (MonoCallInst*)ins;
2654
2655                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
2656                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
2657                                 ins->sreg1 = AMD64_R11;
2658                         }
2659
2660                         /*
2661                          * The AMD64 ABI forces callers to know about varargs.
2662                          */
2663                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
2664                                 if (ins->sreg1 == AMD64_RAX) {
2665                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
2666                                         ins->sreg1 = AMD64_R11;
2667                                 }
2668                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2669                         }
2670                         amd64_call_reg (code, ins->sreg1);
2671                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2672                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2673                         code = emit_move_return_value (cfg, ins, code);
2674                         break;
2675                 case OP_FCALL_MEMBASE:
2676                 case OP_LCALL_MEMBASE:
2677                 case OP_VCALL_MEMBASE:
2678                 case OP_VOIDCALL_MEMBASE:
2679                 case OP_CALL_MEMBASE:
2680                         call = (MonoCallInst*)ins;
2681
2682                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
2683                                 /* 
2684                                  * Can't use R11 because it is clobbered by the trampoline 
2685                                  * code, and the reg value is needed by get_vcall_slot_addr.
2686                                  */
2687                                 amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
2688                                 ins->sreg1 = AMD64_RAX;
2689                         }
2690
2691                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
2692                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2693                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2694                         code = emit_move_return_value (cfg, ins, code);
2695                         break;
2696                 case OP_OUTARG:
2697                 case OP_X86_PUSH:
2698                         amd64_push_reg (code, ins->sreg1);
2699                         break;
2700                 case OP_X86_PUSH_IMM:
2701                         g_assert (amd64_is_imm32 (ins->inst_imm));
2702                         amd64_push_imm (code, ins->inst_imm);
2703                         break;
2704                 case OP_X86_PUSH_MEMBASE:
2705                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
2706                         break;
2707                 case OP_X86_PUSH_OBJ: 
2708                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
2709                         amd64_push_reg (code, AMD64_RDI);
2710                         amd64_push_reg (code, AMD64_RSI);
2711                         amd64_push_reg (code, AMD64_RCX);
2712                         if (ins->inst_offset)
2713                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
2714                         else
2715                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
2716                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
2717                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
2718                         amd64_cld (code);
2719                         amd64_prefix (code, X86_REP_PREFIX);
2720                         amd64_movsd (code);
2721                         amd64_pop_reg (code, AMD64_RCX);
2722                         amd64_pop_reg (code, AMD64_RSI);
2723                         amd64_pop_reg (code, AMD64_RDI);
2724                         break;
2725                 case OP_X86_LEA:
2726                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2727                         break;
2728                 case OP_X86_LEA_MEMBASE:
2729                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2730                         break;
2731                 case OP_X86_XCHG:
2732                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2733                         break;
2734                 case OP_LOCALLOC:
2735                         /* keep alignment */
2736                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2737                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2738                         code = mono_emit_stack_alloc (code, ins);
2739                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
2740                         break;
2741                 case CEE_RET:
2742                         amd64_ret (code);
2743                         break;
2744                 case CEE_THROW: {
2745                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
2746                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2747                                              (gpointer)"mono_arch_throw_exception");
2748                         break;
2749                 }
2750                 case OP_RETHROW: {
2751                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
2752                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2753                                              (gpointer)"mono_arch_rethrow_exception");
2754                         break;
2755                 }
2756                 case OP_CALL_HANDLER: 
2757                         /* Align stack */
2758                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2759                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2760                         amd64_call_imm (code, 0);
2761                         /* Restore stack alignment */
2762                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2763                         break;
2764                 case OP_LABEL:
2765                         ins->inst_c0 = code - cfg->native_code;
2766                         break;
2767                 case CEE_BR:
2768                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2769                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2770                         //break;
2771                         if (ins->flags & MONO_INST_BRLABEL) {
2772                                 if (ins->inst_i0->inst_c0) {
2773                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2774                                 } else {
2775                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2776                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2777                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2778                                                 x86_jump8 (code, 0);
2779                                         else 
2780                                                 x86_jump32 (code, 0);
2781                                 }
2782                         } else {
2783                                 if (ins->inst_target_bb->native_offset) {
2784                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2785                                 } else {
2786                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2787                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2788                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2789                                                 x86_jump8 (code, 0);
2790                                         else 
2791                                                 x86_jump32 (code, 0);
2792                                 } 
2793                         }
2794                         break;
2795                 case OP_BR_REG:
2796                         amd64_jump_reg (code, ins->sreg1);
2797                         break;
2798                 case OP_CEQ:
2799                 case OP_ICEQ:
2800                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2801                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2802                         break;
2803                 case OP_CLT:
2804                 case OP_ICLT:
2805                         amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2806                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2807                         break;
2808                 case OP_CLT_UN:
2809                 case OP_ICLT_UN:
2810                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2811                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2812                         break;
2813                 case OP_CGT:
2814                 case OP_ICGT:
2815                         amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2816                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2817                         break;
2818                 case OP_CGT_UN:
2819                 case OP_ICGT_UN:
2820                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2821                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2822                         break;
2823                 case OP_COND_EXC_EQ:
2824                 case OP_COND_EXC_NE_UN:
2825                 case OP_COND_EXC_LT:
2826                 case OP_COND_EXC_LT_UN:
2827                 case OP_COND_EXC_GT:
2828                 case OP_COND_EXC_GT_UN:
2829                 case OP_COND_EXC_GE:
2830                 case OP_COND_EXC_GE_UN:
2831                 case OP_COND_EXC_LE:
2832                 case OP_COND_EXC_LE_UN:
2833                 case OP_COND_EXC_OV:
2834                 case OP_COND_EXC_NO:
2835                 case OP_COND_EXC_C:
2836                 case OP_COND_EXC_NC:
2837                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2838                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2839                         break;
2840                 case CEE_BEQ:
2841                 case CEE_BNE_UN:
2842                 case CEE_BLT:
2843                 case CEE_BLT_UN:
2844                 case CEE_BGT:
2845                 case CEE_BGT_UN:
2846                 case CEE_BGE:
2847                 case CEE_BGE_UN:
2848                 case CEE_BLE:
2849                 case CEE_BLE_UN:
2850                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2851                         break;
2852
2853                 /* floating point opcodes */
2854                 case OP_R8CONST: {
2855                         double d = *(double *)ins->inst_p0;
2856
2857                         if (use_sse2) {
2858                                 if ((d == 0.0) && (mono_signbit (d) == 0)) {
2859                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
2860                                 }
2861                                 else {
2862                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2863                                         amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
2864                                 }
2865                         }
2866                         else if ((d == 0.0) && (mono_signbit (d) == 0)) {
2867                                 amd64_fldz (code);
2868                         } else if (d == 1.0) {
2869                                 x86_fld1 (code);
2870                         } else {
2871                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2872                                 amd64_fld_membase (code, AMD64_RIP, 0, TRUE);
2873                         }
2874                         break;
2875                 }
2876                 case OP_R4CONST: {
2877                         float f = *(float *)ins->inst_p0;
2878
2879                         if (use_sse2) {
2880                                 if ((f == 0.0) && (mono_signbit (f) == 0)) {
2881                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
2882                                 }
2883                                 else {
2884                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2885                                         amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
2886                                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
2887                                 }
2888                         }
2889                         else if ((f == 0.0) && (mono_signbit (f) == 0)) {
2890                                 amd64_fldz (code);
2891                         } else if (f == 1.0) {
2892                                 x86_fld1 (code);
2893                         } else {
2894                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2895                                 amd64_fld_membase (code, AMD64_RIP, 0, FALSE);
2896                         }
2897                         break;
2898                 }
2899                 case OP_STORER8_MEMBASE_REG:
2900                         if (use_sse2)
2901                                 amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
2902                         else
2903                                 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2904                         break;
2905                 case OP_LOADR8_SPILL_MEMBASE:
2906                         if (use_sse2)
2907                                 g_assert_not_reached ();
2908                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2909                         amd64_fxch (code, 1);
2910                         break;
2911                 case OP_LOADR8_MEMBASE:
2912                         if (use_sse2)
2913                                 amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2914                         else
2915                                 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2916                         break;
2917                 case OP_STORER4_MEMBASE_REG:
2918                         if (use_sse2) {
2919                                 /* This requires a double->single conversion */
2920                                 amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1);
2921                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15);
2922                         }
2923                         else
2924                                 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2925                         break;
2926                 case OP_LOADR4_MEMBASE:
2927                         if (use_sse2) {
2928                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2929                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
2930                         }
2931                         else
2932                                 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2933                         break;
2934                 case CEE_CONV_R4: /* FIXME: change precision */
2935                 case CEE_CONV_R8:
2936                         if (use_sse2)
2937                                 amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2938                         else {
2939                                 amd64_push_reg (code, ins->sreg1);
2940                                 amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
2941                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2942                         }
2943                         break;
2944                 case CEE_CONV_R_UN:
2945                         /* Emulated */
2946                         g_assert_not_reached ();
2947                         break;
2948                 case OP_LCONV_TO_R4: /* FIXME: change precision */
2949                 case OP_LCONV_TO_R8:
2950                         if (use_sse2)
2951                                 amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
2952                         else {
2953                                 amd64_push_reg (code, ins->sreg1);
2954                                 amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
2955                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2956                         }
2957                         break;
2958                 case OP_X86_FP_LOAD_I8:
2959                         if (use_sse2)
2960                                 g_assert_not_reached ();
2961                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2962                         break;
2963                 case OP_X86_FP_LOAD_I4:
2964                         if (use_sse2)
2965                                 g_assert_not_reached ();
2966                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2967                         break;
2968                 case OP_FCONV_TO_I1:
2969                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
2970                         break;
2971                 case OP_FCONV_TO_U1:
2972                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
2973                         break;
2974                 case OP_FCONV_TO_I2:
2975                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
2976                         break;
2977                 case OP_FCONV_TO_U2:
2978                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
2979                         break;
2980                 case OP_FCONV_TO_I4:
2981                 case OP_FCONV_TO_I:
2982                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
2983                         break;
2984                 case OP_FCONV_TO_I8:
2985                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
2986                         break;
2987                 case OP_LCONV_TO_R_UN: { 
2988                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2989                         guint8 *br;
2990
2991                         if (use_sse2)
2992                                 g_assert_not_reached ();
2993
2994                         /* load 64bit integer to FP stack */
2995                         amd64_push_imm (code, 0);
2996                         amd64_push_reg (code, ins->sreg2);
2997                         amd64_push_reg (code, ins->sreg1);
2998                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
2999                         /* store as 80bit FP value */
3000                         x86_fst80_membase (code, AMD64_RSP, 0);
3001                         
3002                         /* test if lreg is negative */
3003                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3004                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3005         
3006                         /* add correction constant mn */
3007                         x86_fld80_mem (code, mn);
3008                         x86_fld80_membase (code, AMD64_RSP, 0);
3009                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3010                         x86_fst80_membase (code, AMD64_RSP, 0);
3011
3012                         amd64_patch (br, code);
3013
3014                         x86_fld80_membase (code, AMD64_RSP, 0);
3015                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3016
3017                         break;
3018                 }
3019                 case OP_LCONV_TO_OVF_I: {
3020                         guint8 *br [3], *label [1];
3021
3022                         if (use_sse2)
3023                                 g_assert_not_reached ();
3024
3025                         /* 
3026                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3027                          */
3028                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3029
3030                         /* If the low word top bit is set, see if we are negative */
3031                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3032                         /* We are not negative (no top bit set, check for our top word to be zero */
3033                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3034                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3035                         label [0] = code;
3036
3037                         /* throw exception */
3038                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3039                         x86_jump32 (code, 0);
3040         
3041                         amd64_patch (br [0], code);
3042                         /* our top bit is set, check that top word is 0xfffffff */
3043                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3044                 
3045                         amd64_patch (br [1], code);
3046                         /* nope, emit exception */
3047                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3048                         amd64_patch (br [2], label [0]);
3049
3050                         if (ins->dreg != ins->sreg1)
3051                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3052                         break;
3053                 }
3054                 case CEE_CONV_OVF_U4:
3055                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
3056                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
3057                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3058                         break;
3059                 case CEE_CONV_OVF_I4_UN:
3060                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
3061                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
3062                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3063                         break;
3064                 case OP_FMOVE:
3065                         if (use_sse2 && (ins->dreg != ins->sreg1))
3066                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
3067                         break;
3068                 case OP_FADD:
3069                         if (use_sse2)
3070                                 amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
3071                         else
3072                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3073                         break;
3074                 case OP_FSUB:
3075                         if (use_sse2)
3076                                 amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
3077                         else
3078                                 amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3079                         break;          
3080                 case OP_FMUL:
3081                         if (use_sse2)
3082                                 amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
3083                         else
3084                                 amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3085                         break;          
3086                 case OP_FDIV:
3087                         if (use_sse2)
3088                                 amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
3089                         else
3090                                 amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3091                         break;          
3092                 case OP_FNEG:
3093                         if (use_sse2) {
3094                                 amd64_mov_reg_imm_size (code, AMD64_R11, 0x8000000000000000, 8);
3095                                 amd64_push_reg (code, AMD64_R11);
3096                                 amd64_push_reg (code, AMD64_R11);
3097                                 amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RSP, 0);
3098                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
3099                         }
3100                         else
3101                                 amd64_fchs (code);
3102                         break;          
3103                 case OP_SIN:
3104                         if (use_sse2) {
3105                                 EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
3106                         }
3107                         else {
3108                                 amd64_fsin (code);
3109                                 amd64_fldz (code);
3110                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3111                         }
3112                         break;          
3113                 case OP_COS:
3114                         if (use_sse2) {
3115                                 EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
3116                         }
3117                         else {
3118                                 amd64_fcos (code);
3119                                 amd64_fldz (code);
3120                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3121                         }
3122                         break;          
3123                 case OP_ABS:
3124                         if (use_sse2) {
3125                                 EMIT_SSE2_FPFUNC (code, fabs, ins->dreg, ins->sreg1);
3126                         }
3127                         else
3128                                 amd64_fabs (code);
3129                         break;          
3130                 case OP_TAN: {
3131                         /* 
3132                          * it really doesn't make sense to inline all this code,
3133                          * it's here just to show that things may not be as simple 
3134                          * as they appear.
3135                          */
3136                         guchar *check_pos, *end_tan, *pop_jump;
3137                         if (use_sse2)
3138                                 g_assert_not_reached ();
3139                         amd64_push_reg (code, AMD64_RAX);
3140                         amd64_fptan (code);
3141                         amd64_fnstsw (code);
3142                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3143                         check_pos = code;
3144                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3145                         amd64_fstp (code, 0); /* pop the 1.0 */
3146                         end_tan = code;
3147                         x86_jump8 (code, 0);
3148                         amd64_fldpi (code);
3149                         amd64_fp_op (code, X86_FADD, 0);
3150                         amd64_fxch (code, 1);
3151                         x86_fprem1 (code);
3152                         amd64_fstsw (code);
3153                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3154                         pop_jump = code;
3155                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3156                         amd64_fstp (code, 1);
3157                         amd64_fptan (code);
3158                         amd64_patch (pop_jump, code);
3159                         amd64_fstp (code, 0); /* pop the 1.0 */
3160                         amd64_patch (check_pos, code);
3161                         amd64_patch (end_tan, code);
3162                         amd64_fldz (code);
3163                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3164                         amd64_pop_reg (code, AMD64_RAX);
3165                         break;
3166                 }
3167                 case OP_ATAN:
3168                         if (use_sse2)
3169                                 g_assert_not_reached ();
3170                         x86_fld1 (code);
3171                         amd64_fpatan (code);
3172                         amd64_fldz (code);
3173                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3174                         break;          
3175                 case OP_SQRT:
3176                         if (use_sse2) {
3177                                 EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
3178                         }
3179                         else
3180                                 amd64_fsqrt (code);
3181                         break;          
3182                 case OP_X86_FPOP:
3183                         if (!use_sse2)
3184                                 amd64_fstp (code, 0);
3185                         break;          
3186                 case OP_FREM: {
3187                         guint8 *l1, *l2;
3188
3189                         if (use_sse2)
3190                                 g_assert_not_reached ();
3191                         amd64_push_reg (code, AMD64_RAX);
3192                         /* we need to exchange ST(0) with ST(1) */
3193                         amd64_fxch (code, 1);
3194
3195                         /* this requires a loop, because fprem somtimes 
3196                          * returns a partial remainder */
3197                         l1 = code;
3198                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3199                         /* x86_fprem1 (code); */
3200                         amd64_fprem (code);
3201                         amd64_fnstsw (code);
3202                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3203                         l2 = code + 2;
3204                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3205
3206                         /* pop result */
3207                         amd64_fstp (code, 1);
3208
3209                         amd64_pop_reg (code, AMD64_RAX);
3210                         break;
3211                 }
3212                 case OP_FCOMPARE:
3213                         if (use_sse2) {
3214                                 /* 
3215                                  * The two arguments are swapped because the fbranch instructions
3216                                  * depend on this for the non-sse case to work.
3217                                  */
3218                                 amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3219                                 break;
3220                         }
3221                         if (cfg->opt & MONO_OPT_FCMOV) {
3222                                 amd64_fcomip (code, 1);
3223                                 amd64_fstp (code, 0);
3224                                 break;
3225                         }
3226                         /* this overwrites EAX */
3227                         EMIT_FPCOMPARE(code);
3228                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3229                         break;
3230                 case OP_FCEQ:
3231                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3232                                 /* zeroing the register at the start results in 
3233                                  * shorter and faster code (we can also remove the widening op)
3234                                  */
3235                                 guchar *unordered_check;
3236                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3237                                 
3238                                 if (use_sse2)
3239                                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
3240                                 else {
3241                                         amd64_fcomip (code, 1);
3242                                         amd64_fstp (code, 0);
3243                                 }
3244                                 unordered_check = code;
3245                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3246                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3247                                 amd64_patch (unordered_check, code);
3248                                 break;
3249                         }
3250                         if (ins->dreg != AMD64_RAX) 
3251                                 amd64_push_reg (code, AMD64_RAX);
3252
3253                         EMIT_FPCOMPARE(code);
3254                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3255                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3256                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3257                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3258
3259                         if (ins->dreg != AMD64_RAX) 
3260                                 amd64_pop_reg (code, AMD64_RAX);
3261                         break;
3262                 case OP_FCLT:
3263                 case OP_FCLT_UN:
3264                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3265                                 /* zeroing the register at the start results in 
3266                                  * shorter and faster code (we can also remove the widening op)
3267                                  */
3268                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3269                                 if (use_sse2)
3270                                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3271                                 else {
3272                                         amd64_fcomip (code, 1);
3273                                         amd64_fstp (code, 0);
3274                                 }
3275                                 if (ins->opcode == OP_FCLT_UN) {
3276                                         guchar *unordered_check = code;
3277                                         guchar *jump_to_end;
3278                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3279                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3280                                         jump_to_end = code;
3281                                         x86_jump8 (code, 0);
3282                                         amd64_patch (unordered_check, code);
3283                                         amd64_inc_reg (code, ins->dreg);
3284                                         amd64_patch (jump_to_end, code);
3285                                 } else {
3286                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3287                                 }
3288                                 break;
3289                         }
3290                         if (ins->dreg != AMD64_RAX) 
3291                                 amd64_push_reg (code, AMD64_RAX);
3292
3293                         EMIT_FPCOMPARE(code);
3294                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3295                         if (ins->opcode == OP_FCLT_UN) {
3296                                 guchar *is_not_zero_check, *end_jump;
3297                                 is_not_zero_check = code;
3298                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3299                                 end_jump = code;
3300                                 x86_jump8 (code, 0);
3301                                 amd64_patch (is_not_zero_check, code);
3302                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3303
3304                                 amd64_patch (end_jump, code);
3305                         }
3306                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3307                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3308
3309                         if (ins->dreg != AMD64_RAX) 
3310                                 amd64_pop_reg (code, AMD64_RAX);
3311                         break;
3312                 case OP_FCGT:
3313                 case OP_FCGT_UN:
3314                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3315                                 /* zeroing the register at the start results in 
3316                                  * shorter and faster code (we can also remove the widening op)
3317                                  */
3318                                 guchar *unordered_check;
3319                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3320                                 if (use_sse2)
3321                                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3322                                 else {
3323                                         amd64_fcomip (code, 1);
3324                                         amd64_fstp (code, 0);
3325                                 }
3326                                 if (ins->opcode == OP_FCGT) {
3327                                         unordered_check = code;
3328                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3329                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3330                                         amd64_patch (unordered_check, code);
3331                                 } else {
3332                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3333                                 }
3334                                 break;
3335                         }
3336                         if (ins->dreg != AMD64_RAX) 
3337                                 amd64_push_reg (code, AMD64_RAX);
3338
3339                         EMIT_FPCOMPARE(code);
3340                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3341                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3342                         if (ins->opcode == OP_FCGT_UN) {
3343                                 guchar *is_not_zero_check, *end_jump;
3344                                 is_not_zero_check = code;
3345                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3346                                 end_jump = code;
3347                                 x86_jump8 (code, 0);
3348                                 amd64_patch (is_not_zero_check, code);
3349                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3350
3351                                 amd64_patch (end_jump, code);
3352                         }
3353                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3354                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3355
3356                         if (ins->dreg != AMD64_RAX) 
3357                                 amd64_pop_reg (code, AMD64_RAX);
3358                         break;
3359                 case OP_FCLT_MEMBASE:
3360                 case OP_FCGT_MEMBASE:
3361                 case OP_FCLT_UN_MEMBASE:
3362                 case OP_FCGT_UN_MEMBASE:
3363                 case OP_FCEQ_MEMBASE: {
3364                         guchar *unordered_check, *jump_to_end;
3365                         int x86_cond;
3366                         g_assert (use_sse2);
3367
3368                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3369                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
3370
3371                         switch (ins->opcode) {
3372                         case OP_FCEQ_MEMBASE:
3373                                 x86_cond = X86_CC_EQ;
3374                                 break;
3375                         case OP_FCLT_MEMBASE:
3376                         case OP_FCLT_UN_MEMBASE:
3377                                 x86_cond = X86_CC_LT;
3378                                 break;
3379                         case OP_FCGT_MEMBASE:
3380                         case OP_FCGT_UN_MEMBASE:
3381                                 x86_cond = X86_CC_GT;
3382                                 break;
3383                         default:
3384                                 g_assert_not_reached ();
3385                         }
3386
3387                         unordered_check = code;
3388                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3389                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
3390
3391                         switch (ins->opcode) {
3392                         case OP_FCEQ_MEMBASE:
3393                         case OP_FCLT_MEMBASE:
3394                         case OP_FCGT_MEMBASE:
3395                                 amd64_patch (unordered_check, code);
3396                                 break;
3397                         case OP_FCLT_UN_MEMBASE:
3398                         case OP_FCGT_UN_MEMBASE:
3399                                 jump_to_end = code;
3400                                 x86_jump8 (code, 0);
3401                                 amd64_patch (unordered_check, code);
3402                                 amd64_inc_reg (code, ins->dreg);
3403                                 amd64_patch (jump_to_end, code);
3404                                 break;
3405                         default:
3406                                 break;
3407                         }
3408                         break;
3409                 }
3410                 case OP_FBEQ:
3411                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3412                                 guchar *jump = code;
3413                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3414                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3415                                 amd64_patch (jump, code);
3416                                 break;
3417                         }
3418                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3419                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3420                         break;
3421                 case OP_FBNE_UN:
3422                         /* Branch if C013 != 100 */
3423                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3424                                 /* branch if !ZF or (PF|CF) */
3425                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3426                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3427                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3428                                 break;
3429                         }
3430                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3431                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3432                         break;
3433                 case OP_FBLT:
3434                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3435                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3436                                 break;
3437                         }
3438                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3439                         break;
3440                 case OP_FBLT_UN:
3441                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3442                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3443                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3444                                 break;
3445                         }
3446                         if (ins->opcode == OP_FBLT_UN) {
3447                                 guchar *is_not_zero_check, *end_jump;
3448                                 is_not_zero_check = code;
3449                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3450                                 end_jump = code;
3451                                 x86_jump8 (code, 0);
3452                                 amd64_patch (is_not_zero_check, code);
3453                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3454
3455                                 amd64_patch (end_jump, code);
3456                         }
3457                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3458                         break;
3459                 case OP_FBGT:
3460                 case OP_FBGT_UN:
3461                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3462                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3463                                 break;
3464                         }
3465                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3466                         if (ins->opcode == OP_FBGT_UN) {
3467                                 guchar *is_not_zero_check, *end_jump;
3468                                 is_not_zero_check = code;
3469                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3470                                 end_jump = code;
3471                                 x86_jump8 (code, 0);
3472                                 amd64_patch (is_not_zero_check, code);
3473                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3474
3475                                 amd64_patch (end_jump, code);
3476                         }
3477                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3478                         break;
3479                 case OP_FBGE:
3480                         /* Branch if C013 == 100 or 001 */
3481                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3482                                 guchar *br1;
3483
3484                                 /* skip branch if C1=1 */
3485                                 br1 = code;
3486                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3487                                 /* branch if (C0 | C3) = 1 */
3488                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3489                                 amd64_patch (br1, code);
3490                                 break;
3491                         }
3492                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3493                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3494                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3495                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3496                         break;
3497                 case OP_FBGE_UN:
3498                         /* Branch if C013 == 000 */
3499                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3500                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3501                                 break;
3502                         }
3503                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3504                         break;
3505                 case OP_FBLE:
3506                         /* Branch if C013=000 or 100 */
3507                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3508                                 guchar *br1;
3509
3510                                 /* skip branch if C1=1 */
3511                                 br1 = code;
3512                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3513                                 /* branch if C0=0 */
3514                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3515                                 amd64_patch (br1, code);
3516                                 break;
3517                         }
3518                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
3519                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
3520                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3521                         break;
3522                 case OP_FBLE_UN:
3523                         /* Branch if C013 != 001 */
3524                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3525                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3526                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3527                                 break;
3528                         }
3529                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3530                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3531                         break;
3532                 case CEE_CKFINITE: {
3533                         if (use_sse2) {
3534                                 /* Transfer value to the fp stack */
3535                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
3536                                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
3537                                 amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
3538                         }
3539                         amd64_push_reg (code, AMD64_RAX);
3540                         amd64_fxam (code);
3541                         amd64_fnstsw (code);
3542                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
3543                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3544                         amd64_pop_reg (code, AMD64_RAX);
3545                         if (use_sse2) {
3546                                 amd64_fstp (code, 0);
3547                         }                               
3548                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3549                         if (use_sse2)
3550                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
3551                         break;
3552                 }
3553                 case OP_TLS_GET: {
3554                         x86_prefix (code, X86_FS_PREFIX);
3555                         amd64_mov_reg_mem (code, ins->dreg, ins->inst_offset, 8);
3556                         break;
3557                 }
3558                 case OP_ATOMIC_ADD_I4:
3559                 case OP_ATOMIC_ADD_I8: {
3560                         int dreg = ins->dreg;
3561                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
3562
3563                         if (dreg == ins->inst_basereg)
3564                                 dreg = AMD64_R11;
3565                         
3566                         if (dreg != ins->sreg2)
3567                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg2, size);
3568
3569                         x86_prefix (code, X86_LOCK_PREFIX);
3570                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3571
3572                         if (dreg != ins->dreg)
3573                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3574
3575                         break;
3576                 }
3577                 case OP_ATOMIC_ADD_NEW_I4:
3578                 case OP_ATOMIC_ADD_NEW_I8: {
3579                         int dreg = ins->dreg;
3580                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_NEW_I4) ? 4 : 8;
3581
3582                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
3583                                 dreg = AMD64_R11;
3584
3585                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
3586                         amd64_prefix (code, X86_LOCK_PREFIX);
3587                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3588                         /* dreg contains the old value, add with sreg2 value */
3589                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
3590                         
3591                         if (ins->dreg != dreg)
3592                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3593
3594                         break;
3595                 }
3596                 case OP_ATOMIC_EXCHANGE_I4:
3597                 case OP_ATOMIC_EXCHANGE_I8: {
3598                         guchar *br[2];
3599                         int sreg2 = ins->sreg2;
3600                         int breg = ins->inst_basereg;
3601                         guint32 size = (ins->opcode == OP_ATOMIC_EXCHANGE_I4) ? 4 : 8;
3602
3603                         /* 
3604                          * See http://msdn.microsoft.com/msdnmag/issues/0700/Win32/ for
3605                          * an explanation of how this works.
3606                          */
3607
3608                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3609                          * hack to overcome limits in x86 reg allocator 
3610                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3611                          */
3612                         if (ins->dreg != AMD64_RAX)
3613                                 amd64_push_reg (code, AMD64_RAX);
3614                         
3615                         /* We need the EAX reg for the cmpxchg */
3616                         if (ins->sreg2 == AMD64_RAX) {
3617                                 amd64_push_reg (code, AMD64_RDX);
3618                                 amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
3619                                 sreg2 = AMD64_RDX;
3620                         }
3621
3622                         if (breg == AMD64_RAX) {
3623                                 amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, size);
3624                                 breg = AMD64_R11;
3625                         }
3626
3627                         amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
3628
3629                         br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
3630                         amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
3631                         br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
3632                         amd64_patch (br [1], br [0]);
3633
3634                         if (ins->dreg != AMD64_RAX) {
3635                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
3636                                 amd64_pop_reg (code, AMD64_RAX);
3637                         }
3638
3639                         if (ins->sreg2 != sreg2)
3640                                 amd64_pop_reg (code, AMD64_RDX);
3641
3642                         break;
3643                 }
3644                 default:
3645                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3646                         g_assert_not_reached ();
3647                 }
3648
3649                 if ((code - cfg->native_code - offset) > max_len) {
3650                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
3651                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3652                         g_assert_not_reached ();
3653                 }
3654                
3655                 cpos += max_len;
3656
3657                 last_ins = ins;
3658                 last_offset = offset;
3659                 
3660                 ins = ins->next;
3661         }
3662
3663         cfg->code_len = code - cfg->native_code;
3664 }
3665
3666 void
3667 mono_arch_register_lowlevel_calls (void)
3668 {
3669 }
3670
3671 void
3672 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3673 {
3674         MonoJumpInfo *patch_info;
3675         gboolean compile_aot = !run_cctors;
3676
3677         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3678                 unsigned char *ip = patch_info->ip.i + code;
3679                 const unsigned char *target;
3680
3681                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3682
3683                 if (compile_aot) {
3684                         switch (patch_info->type) {
3685                         case MONO_PATCH_INFO_BB:
3686                         case MONO_PATCH_INFO_LABEL:
3687                                 break;
3688                         default:
3689                                 /* No need to patch these */
3690                                 continue;
3691                         }
3692                 }
3693
3694                 switch (patch_info->type) {
3695                 case MONO_PATCH_INFO_NONE:
3696                         continue;
3697                 case MONO_PATCH_INFO_CLASS_INIT: {
3698                         /* Might already been changed to a nop */
3699                         guint8* ip2 = ip;
3700                         amd64_call_code (ip2, 0);
3701                         break;
3702                 }
3703                 case MONO_PATCH_INFO_METHOD_REL:
3704                 case MONO_PATCH_INFO_R8:
3705                 case MONO_PATCH_INFO_R4:
3706                         g_assert_not_reached ();
3707                         continue;
3708                 case MONO_PATCH_INFO_BB:
3709                         break;
3710                 default:
3711                         break;
3712                 }
3713                 amd64_patch (ip, (gpointer)target);
3714         }
3715 }
3716
3717 guint8 *
3718 mono_arch_emit_prolog (MonoCompile *cfg)
3719 {
3720         MonoMethod *method = cfg->method;
3721         MonoBasicBlock *bb;
3722         MonoMethodSignature *sig;
3723         MonoInst *inst;
3724         int alloc_size, pos, max_offset, i, quad;
3725         guint8 *code;
3726         CallInfo *cinfo;
3727
3728         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
3729         code = cfg->native_code = g_malloc (cfg->code_size);
3730
3731         amd64_push_reg (code, AMD64_RBP);
3732         amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
3733
3734         /* Stack alignment check */
3735 #if 0
3736         {
3737                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
3738                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
3739                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
3740                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
3741                 amd64_breakpoint (code);
3742         }
3743 #endif
3744
3745         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
3746         pos = 0;
3747
3748         if (method->save_lmf) {
3749                 gint32 lmf_offset;
3750
3751                 pos = ALIGN_TO (pos + sizeof (MonoLMF), 16);
3752
3753                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, pos);
3754
3755                 lmf_offset = - cfg->arch.lmf_offset;
3756
3757                 /* Save ip */
3758                 amd64_lea_membase (code, AMD64_R11, AMD64_RIP, 0);
3759                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8);
3760                 /* Save fp */
3761                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), AMD64_RBP, 8);
3762                 /* Save method */
3763                 /* FIXME: add a relocation for this */
3764                 if (IS_IMM32 (cfg->method))
3765                         amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), (guint64)cfg->method, 8);
3766                 else {
3767                         amd64_mov_reg_imm (code, AMD64_R11, cfg->method);
3768                         amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8);
3769                 }
3770                 /* Save callee saved regs */
3771                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
3772                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
3773                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
3774                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
3775                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
3776         } else {
3777
3778                 for (i = 0; i < AMD64_NREG; ++i)
3779                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3780                                 amd64_push_reg (code, i);
3781                                 pos += sizeof (gpointer);
3782                         }
3783         }
3784
3785         alloc_size -= pos;
3786
3787         if (alloc_size) {
3788                 /* See mono_emit_stack_alloc */
3789 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3790                 guint32 remaining_size = alloc_size;
3791                 while (remaining_size >= 0x1000) {
3792                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3793                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3794                         remaining_size -= 0x1000;
3795                 }
3796                 if (remaining_size)
3797                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
3798 #else
3799                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
3800 #endif
3801         }
3802
3803         /* compute max_offset in order to use short forward jumps */
3804         max_offset = 0;
3805         if (cfg->opt & MONO_OPT_BRANCH) {
3806                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3807                         MonoInst *ins = bb->code;
3808                         bb->max_offset = max_offset;
3809
3810                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3811                                 max_offset += 6;
3812                         /* max alignment for loops */
3813                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3814                                 max_offset += LOOP_ALIGNMENT;
3815
3816                         while (ins) {
3817                                 if (ins->opcode == OP_LABEL)
3818                                         ins->inst_c1 = max_offset;
3819                                 
3820                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3821                                 ins = ins->next;
3822                         }
3823                 }
3824         }
3825
3826         sig = mono_method_signature (method);
3827         pos = 0;
3828
3829         cinfo = get_call_info (sig, FALSE);
3830
3831         if (sig->ret->type != MONO_TYPE_VOID) {
3832                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
3833                         /* Save volatile arguments to the stack */
3834                         amd64_mov_membase_reg (code, cfg->ret->inst_basereg, cfg->ret->inst_offset, cinfo->ret.reg, 8);
3835                 }
3836         }
3837
3838         /* Keep this in sync with emit_load_volatile_arguments */
3839         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3840                 ArgInfo *ainfo = cinfo->args + i;
3841                 gint32 stack_offset;
3842                 MonoType *arg_type;
3843                 inst = cfg->varinfo [i];
3844
3845                 if (sig->hasthis && (i == 0))
3846                         arg_type = &mono_defaults.object_class->byval_arg;
3847                 else
3848                         arg_type = sig->params [i - sig->hasthis];
3849
3850                 stack_offset = ainfo->offset + ARGS_OFFSET;
3851
3852                 /* Save volatile arguments to the stack */
3853                 if (inst->opcode != OP_REGVAR) {
3854                         switch (ainfo->storage) {
3855                         case ArgInIReg: {
3856                                 guint32 size = 8;
3857
3858                                 /* FIXME: I1 etc */
3859                                 /*
3860                                 if (stack_offset & 0x1)
3861                                         size = 1;
3862                                 else if (stack_offset & 0x2)
3863                                         size = 2;
3864                                 else if (stack_offset & 0x4)
3865                                         size = 4;
3866                                 else
3867                                         size = 8;
3868                                 */
3869                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, size);
3870                                 break;
3871                         }
3872                         case ArgInFloatSSEReg:
3873                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
3874                                 break;
3875                         case ArgInDoubleSSEReg:
3876                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
3877                                 break;
3878                         case ArgValuetypeInReg:
3879                                 for (quad = 0; quad < 2; quad ++) {
3880                                         switch (ainfo->pair_storage [quad]) {
3881                                         case ArgInIReg:
3882                                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer));
3883                                                 break;
3884                                         case ArgInFloatSSEReg:
3885                                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
3886                                                 break;
3887                                         case ArgInDoubleSSEReg:
3888                                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
3889                                                 break;
3890                                         case ArgNone:
3891                                                 break;
3892                                         default:
3893                                                 g_assert_not_reached ();
3894                                         }
3895                                 }
3896                                 break;
3897                         default:
3898                                 break;
3899                         }
3900                 }
3901
3902                 if (inst->opcode == OP_REGVAR) {
3903                         /* Argument allocated to (non-volatile) register */
3904                         switch (ainfo->storage) {
3905                         case ArgInIReg:
3906                                 amd64_mov_reg_reg (code, inst->dreg, ainfo->reg, 8);
3907                                 break;
3908                         case ArgOnStack:
3909                                 amd64_mov_reg_membase (code, inst->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
3910                                 break;
3911                         default:
3912                                 g_assert_not_reached ();
3913                         }
3914                 }
3915         }
3916
3917         if (method->save_lmf) {
3918                 gint32 lmf_offset;
3919
3920                 if (lmf_tls_offset != -1) {
3921                         /* Load lmf quicky using the FS register */
3922                         x86_prefix (code, X86_FS_PREFIX);
3923                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
3924                 }
3925                 else {
3926                         /* 
3927                          * The call might clobber argument registers, but they are already
3928                          * saved to the stack/global regs.
3929                          */
3930
3931                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3932                                                                  (gpointer)"mono_get_lmf_addr");                
3933                 }
3934
3935                 lmf_offset = - cfg->arch.lmf_offset;
3936
3937                 /* Save lmf_addr */
3938                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
3939                 /* Save previous_lmf */
3940                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
3941                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
3942                 /* Set new lmf */
3943                 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
3944                 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
3945         }
3946
3947
3948         g_free (cinfo);
3949
3950         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3951                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3952
3953         cfg->code_len = code - cfg->native_code;
3954
3955         g_assert (cfg->code_len < cfg->code_size);
3956
3957         return code;
3958 }
3959
3960 void
3961 mono_arch_emit_epilog (MonoCompile *cfg)
3962 {
3963         MonoMethod *method = cfg->method;
3964         int quad, pos, i;
3965         guint8 *code;
3966         int max_epilog_size = 16;
3967         CallInfo *cinfo;
3968         
3969         if (cfg->method->save_lmf)
3970                 max_epilog_size += 256;
3971         
3972         if (mono_jit_trace_calls != NULL)
3973                 max_epilog_size += 50;
3974
3975         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3976                 max_epilog_size += 50;
3977
3978         max_epilog_size += (AMD64_NREG * 2);
3979
3980         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3981                 cfg->code_size *= 2;
3982                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3983                 mono_jit_stats.code_reallocs++;
3984         }
3985
3986         code = cfg->native_code + cfg->code_len;
3987
3988         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3989                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3990
3991         /* the code restoring the registers must be kept in sync with CEE_JMP */
3992         pos = 0;
3993         
3994         if (method->save_lmf) {
3995                 gint32 lmf_offset = - cfg->arch.lmf_offset;
3996
3997                 /* Restore previous lmf */
3998                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
3999                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
4000                 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
4001
4002                 /* Restore caller saved regs */
4003                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4004                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4005                 }
4006                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4007                         amd64_mov_reg_membase (code, AMD64_R12, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4008                 }
4009                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4010                         amd64_mov_reg_membase (code, AMD64_R13, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4011                 }
4012                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4013                         amd64_mov_reg_membase (code, AMD64_R14, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4014                 }
4015                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4016                         amd64_mov_reg_membase (code, AMD64_R15, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4017                 }
4018         } else {
4019
4020                 for (i = 0; i < AMD64_NREG; ++i)
4021                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4022                                 pos -= sizeof (gpointer);
4023
4024                 if (pos) {
4025                         if (pos == - sizeof (gpointer)) {
4026                                 /* Only one register, so avoid lea */
4027                                 for (i = AMD64_NREG - 1; i > 0; --i)
4028                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4029                                                 amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4030                                         }
4031                         }
4032                         else {
4033                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4034
4035                                 /* Pop registers in reverse order */
4036                                 for (i = AMD64_NREG - 1; i > 0; --i)
4037                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4038                                                 amd64_pop_reg (code, i);
4039                                         }
4040                         }
4041                 }
4042         }
4043
4044         /* Load returned vtypes into registers if needed */
4045         cinfo = get_call_info (mono_method_signature (method), FALSE);
4046         if (cinfo->ret.storage == ArgValuetypeInReg) {
4047                 ArgInfo *ainfo = &cinfo->ret;
4048                 MonoInst *inst = cfg->ret;
4049
4050                 for (quad = 0; quad < 2; quad ++) {
4051                         switch (ainfo->pair_storage [quad]) {
4052                         case ArgInIReg:
4053                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
4054                                 break;
4055                         case ArgInFloatSSEReg:
4056                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4057                                 break;
4058                         case ArgInDoubleSSEReg:
4059                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4060                                 break;
4061                         case ArgNone:
4062                                 break;
4063                         default:
4064                                 g_assert_not_reached ();
4065                         }
4066                 }
4067         }
4068         g_free (cinfo);
4069
4070         amd64_leave (code);
4071         amd64_ret (code);
4072
4073         cfg->code_len = code - cfg->native_code;
4074
4075         g_assert (cfg->code_len < cfg->code_size);
4076
4077 }
4078
4079 void
4080 mono_arch_emit_exceptions (MonoCompile *cfg)
4081 {
4082         MonoJumpInfo *patch_info;
4083         int nthrows, i;
4084         guint8 *code;
4085         MonoClass *exc_classes [16];
4086         guint8 *exc_throw_start [16], *exc_throw_end [16];
4087         guint32 code_size = 0;
4088
4089         /* Compute needed space */
4090         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4091                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4092                         code_size += 40;
4093                 if (patch_info->type == MONO_PATCH_INFO_R8)
4094                         code_size += 8 + 7; /* sizeof (double) + alignment */
4095                 if (patch_info->type == MONO_PATCH_INFO_R4)
4096                         code_size += 4 + 7; /* sizeof (float) + alignment */
4097         }
4098
4099         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4100                 cfg->code_size *= 2;
4101                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4102                 mono_jit_stats.code_reallocs++;
4103         }
4104
4105         code = cfg->native_code + cfg->code_len;
4106
4107         /* add code to raise exceptions */
4108         nthrows = 0;
4109         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4110                 switch (patch_info->type) {
4111                 case MONO_PATCH_INFO_EXC: {
4112                         MonoClass *exc_class;
4113                         guint8 *buf, *buf2;
4114                         guint32 throw_ip;
4115
4116                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4117
4118                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4119                         g_assert (exc_class);
4120                         throw_ip = patch_info->ip.i;
4121
4122                         //x86_breakpoint (code);
4123                         /* Find a throw sequence for the same exception class */
4124                         for (i = 0; i < nthrows; ++i)
4125                                 if (exc_classes [i] == exc_class)
4126                                         break;
4127                         if (i < nthrows) {
4128                                 amd64_mov_reg_imm (code, AMD64_RSI, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4129                                 x86_jump_code (code, exc_throw_start [i]);
4130                                 patch_info->type = MONO_PATCH_INFO_NONE;
4131                         }
4132                         else {
4133                                 buf = code;
4134                                 amd64_mov_reg_imm_size (code, AMD64_RSI, 0xf0f0f0f0, 4);
4135                                 buf2 = code;
4136
4137                                 if (nthrows < 16) {
4138                                         exc_classes [nthrows] = exc_class;
4139                                         exc_throw_start [nthrows] = code;
4140                                 }
4141
4142                                 amd64_mov_reg_imm (code, AMD64_RDI, exc_class->type_token);
4143                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4144                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4145                                 patch_info->ip.i = code - cfg->native_code;
4146
4147                                 if (cfg->compile_aot) {
4148                                         amd64_mov_reg_membase (code, GP_SCRATCH_REG, AMD64_RIP, 0, 8);
4149                                         amd64_call_reg (code, GP_SCRATCH_REG);
4150                                 } else {
4151                                         /* The callee is in memory allocated using the code manager */
4152                                         amd64_call_code (code, 0);
4153                                 }
4154
4155                                 amd64_mov_reg_imm (buf, AMD64_RSI, (code - cfg->native_code) - throw_ip);
4156                                 while (buf < buf2)
4157                                         x86_nop (buf);
4158
4159                                 if (nthrows < 16) {
4160                                         exc_throw_end [nthrows] = code;
4161                                         nthrows ++;
4162                                 }
4163                         }
4164                         break;
4165                 }
4166                 default:
4167                         /* do nothing */
4168                         break;
4169                 }
4170         }
4171
4172         /* Handle relocations with RIP relative addressing */
4173         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4174                 gboolean remove = FALSE;
4175
4176                 switch (patch_info->type) {
4177                 case MONO_PATCH_INFO_R8: {
4178                         guint8 *pos;
4179
4180                         code = (guint8*)ALIGN_TO (code, 8);
4181
4182                         pos = cfg->native_code + patch_info->ip.i;
4183
4184                         *(double*)code = *(double*)patch_info->data.target;
4185
4186                         if (use_sse2)
4187                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
4188                         else
4189                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4190                         code += 8;
4191
4192                         remove = TRUE;
4193                         break;
4194                 }
4195                 case MONO_PATCH_INFO_R4: {
4196                         guint8 *pos;
4197
4198                         code = (guint8*)ALIGN_TO (code, 8);
4199
4200                         pos = cfg->native_code + patch_info->ip.i;
4201
4202                         *(float*)code = *(float*)patch_info->data.target;
4203
4204                         if (use_sse2)
4205                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
4206                         else
4207                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4208                         code += 4;
4209
4210                         remove = TRUE;
4211                         break;
4212                 }
4213                 default:
4214                         break;
4215                 }
4216
4217                 if (remove) {
4218                         if (patch_info == cfg->patch_info)
4219                                 cfg->patch_info = patch_info->next;
4220                         else {
4221                                 MonoJumpInfo *tmp;
4222
4223                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4224                                         ;
4225                                 tmp->next = patch_info->next;
4226                         }
4227                 }
4228         }
4229
4230         cfg->code_len = code - cfg->native_code;
4231
4232         g_assert (cfg->code_len < cfg->code_size);
4233
4234 }
4235
4236 void*
4237 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4238 {
4239         guchar *code = p;
4240         CallInfo *cinfo = NULL;
4241         MonoMethodSignature *sig;
4242         MonoInst *inst;
4243         int i, n, stack_area = 0;
4244
4245         /* Keep this in sync with mono_arch_get_argument_info */
4246
4247         if (enable_arguments) {
4248                 /* Allocate a new area on the stack and save arguments there */
4249                 sig = mono_method_signature (cfg->method);
4250
4251                 cinfo = get_call_info (sig, FALSE);
4252
4253                 n = sig->param_count + sig->hasthis;
4254
4255                 stack_area = ALIGN_TO (n * 8, 16);
4256
4257                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4258
4259                 for (i = 0; i < n; ++i) {
4260                         inst = cfg->varinfo [i];
4261
4262                         if (inst->opcode == OP_REGVAR)
4263                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
4264                         else {
4265                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4266                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4267                         }
4268                 }
4269         }
4270
4271         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4272         amd64_set_reg_template (code, AMD64_RDI);
4273         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RSP, 8);
4274         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4275
4276         if (enable_arguments) {
4277                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4278
4279                 g_free (cinfo);
4280         }
4281
4282         return code;
4283 }
4284
4285 enum {
4286         SAVE_NONE,
4287         SAVE_STRUCT,
4288         SAVE_EAX,
4289         SAVE_EAX_EDX,
4290         SAVE_XMM
4291 };
4292
4293 void*
4294 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4295 {
4296         guchar *code = p;
4297         int save_mode = SAVE_NONE;
4298         MonoMethod *method = cfg->method;
4299         int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
4300         
4301         switch (rtype) {
4302         case MONO_TYPE_VOID:
4303                 /* special case string .ctor icall */
4304                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4305                         save_mode = SAVE_EAX;
4306                 else
4307                         save_mode = SAVE_NONE;
4308                 break;
4309         case MONO_TYPE_I8:
4310         case MONO_TYPE_U8:
4311                 save_mode = SAVE_EAX;
4312                 break;
4313         case MONO_TYPE_R4:
4314         case MONO_TYPE_R8:
4315                 save_mode = SAVE_XMM;
4316                 break;
4317         case MONO_TYPE_VALUETYPE:
4318                 save_mode = SAVE_STRUCT;
4319                 break;
4320         default:
4321                 save_mode = SAVE_EAX;
4322                 break;
4323         }
4324
4325         /* Save the result and copy it into the proper argument register */
4326         switch (save_mode) {
4327         case SAVE_EAX:
4328                 amd64_push_reg (code, AMD64_RAX);
4329                 /* Align stack */
4330                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4331                 if (enable_arguments)
4332                         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RAX, 8);
4333                 break;
4334         case SAVE_STRUCT:
4335                 /* FIXME: */
4336                 if (enable_arguments)
4337                         amd64_mov_reg_imm (code, AMD64_RSI, 0);
4338                 break;
4339         case SAVE_XMM:
4340                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4341                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4342                 /* Align stack */
4343                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4344                 /* 
4345                  * The result is already in the proper argument register so no copying
4346                  * needed.
4347                  */
4348                 break;
4349         case SAVE_NONE:
4350                 break;
4351         default:
4352                 g_assert_not_reached ();
4353         }
4354
4355         /* Set %al since this is a varargs call */
4356         if (save_mode == SAVE_XMM)
4357                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4358         else
4359                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4360
4361         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4362         amd64_set_reg_template (code, AMD64_RDI);
4363         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4364
4365         /* Restore result */
4366         switch (save_mode) {
4367         case SAVE_EAX:
4368                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4369                 amd64_pop_reg (code, AMD64_RAX);
4370                 break;
4371         case SAVE_STRUCT:
4372                 /* FIXME: */
4373                 break;
4374         case SAVE_XMM:
4375                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4376                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4377                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4378                 break;
4379         case SAVE_NONE:
4380                 break;
4381         default:
4382                 g_assert_not_reached ();
4383         }
4384
4385         return code;
4386 }
4387
4388 void
4389 mono_arch_flush_icache (guint8 *code, gint size)
4390 {
4391         /* Not needed */
4392 }
4393
4394 void
4395 mono_arch_flush_register_windows (void)
4396 {
4397 }
4398
4399 gboolean 
4400 mono_arch_is_inst_imm (gint64 imm)
4401 {
4402         return amd64_is_imm32 (imm);
4403 }
4404
4405 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
4406
4407 static int reg_to_ucontext_reg [] = {
4408         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
4409         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
4410         REG_RIP
4411 };
4412
4413 /*
4414  * Determine whenever the trap whose info is in SIGINFO is caused by
4415  * integer overflow.
4416  */
4417 gboolean
4418 mono_arch_is_int_overflow (void *sigctx, void *info)
4419 {
4420         ucontext_t *ctx = (ucontext_t*)sigctx;
4421         guint8* rip;
4422         int reg;
4423
4424         rip = (guint8*)ctx->uc_mcontext.gregs [REG_RIP];
4425
4426         if (IS_REX (rip [0])) {
4427                 reg = amd64_rex_b (rip [0]);
4428                 rip ++;
4429         }
4430         else
4431                 reg = 0;
4432
4433         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4434                 /* idiv REG */
4435                 reg += x86_modrm_rm (rip [1]);
4436
4437                 if (ctx->uc_mcontext.gregs [reg_to_ucontext_reg [reg]] == -1)
4438                         return TRUE;
4439         }
4440
4441         return FALSE;
4442 }
4443
4444 guint32
4445 mono_arch_get_patch_offset (guint8 *code)
4446 {
4447         return 3;
4448 }
4449
4450 gpointer*
4451 mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
4452 {
4453         guint32 reg;
4454         guint32 disp;
4455         guint8 rex = 0;
4456
4457         /* go to the start of the call instruction
4458          *
4459          * address_byte = (m << 6) | (o << 3) | reg
4460          * call opcode: 0xff address_byte displacement
4461          * 0xff m=1,o=2 imm8
4462          * 0xff m=2,o=2 imm32
4463          */
4464         code -= 7;
4465
4466         /* 
4467          * A given byte sequence can match more than case here, so we have to be
4468          * really careful about the ordering of the cases. Longer sequences
4469          * come first.
4470          */
4471         if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
4472                 /* call OFFSET(%rip) */
4473                 disp = *(guint32*)(code + 3);
4474                 return (gpointer*)(code + disp + 7);
4475         }
4476         else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
4477                 /* call *[reg+disp32] */
4478                 if (IS_REX (code [0]))
4479                         rex = code [0];
4480                 reg = amd64_modrm_rm (code [2]);
4481                 disp = *(guint32*)(code + 3);
4482                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4483         }
4484         else if (code [2] == 0xe8) {
4485                 /* call <ADDR> */
4486                 return NULL;
4487         }
4488         else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
4489                 /* call *%reg */
4490                 return NULL;
4491         }
4492         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) {
4493                 /* call *[reg+disp8] */
4494                 if (IS_REX (code [3]))
4495                         rex = code [3];
4496                 reg = amd64_modrm_rm (code [5]);
4497                 disp = *(guint8*)(code + 6);
4498                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4499         }
4500         else if ((code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
4501                         /*
4502                          * This is a interface call: should check the above code can't catch it earlier 
4503                          * 8b 40 30   mov    0x30(%eax),%eax
4504                          * ff 10      call   *(%eax)
4505                          */
4506                 if (IS_REX (code [4]))
4507                         rex = code [4];
4508                 reg = amd64_modrm_rm (code [6]);
4509                 disp = 0;
4510         }
4511         else
4512                 g_assert_not_reached ();
4513
4514         reg += amd64_rex_b (rex);
4515
4516         /* R11 is clobbered by the trampoline code */
4517         g_assert (reg != AMD64_R11);
4518
4519         return (gpointer)(((guint64)(regs [reg])) + disp);
4520 }
4521
4522 gpointer*
4523 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4524 {
4525         guint32 reg;
4526         guint32 disp;
4527
4528         code -= 10;
4529
4530         if (IS_REX (code [0]) && (code [1] == 0x8b) && (code [3] == 0x48) && (code [4] == 0x8b) && (code [5] == 0x40) && (code [7] == 0x48) && (code [8] == 0xff) && (code [9] == 0xd0)) {
4531                 /* mov REG, %rax; mov <OFFSET>(%rax), %rax; call *%rax */
4532                 reg = amd64_rex_b (code [0]) + amd64_modrm_rm (code [2]);
4533                 disp = code [6];
4534
4535                 if (reg == AMD64_RAX)
4536                         return NULL;
4537                 else
4538                         return (gpointer*)(((guint64)(regs [reg])) + disp);
4539         }
4540
4541         return NULL;
4542 }
4543
4544 /*
4545  * Support for fast access to the thread-local lmf structure using the GS
4546  * segment register on NPTL + kernel 2.6.x.
4547  */
4548
4549 static gboolean tls_offset_inited = FALSE;
4550
4551 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4552
4553 static void
4554 setup_stack (MonoJitTlsData *tls)
4555 {
4556         pthread_t self = pthread_self();
4557         pthread_attr_t attr;
4558         size_t stsize = 0;
4559         struct sigaltstack sa;
4560         guint8 *staddr = NULL;
4561         guint8 *current = (guint8*)&staddr;
4562
4563         if (mono_running_on_valgrind ())
4564                 return;
4565
4566         /* Determine stack boundaries */
4567 #ifdef HAVE_PTHREAD_GETATTR_NP
4568         pthread_getattr_np( self, &attr );
4569 #else
4570 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4571         pthread_attr_get_np( self, &attr );
4572 #elif defined(sun)
4573         pthread_attr_init( &attr );
4574         pthread_attr_getstacksize( &attr, &stsize );
4575 #else
4576 #error "Not implemented"
4577 #endif
4578 #endif
4579 #ifndef sun
4580         pthread_attr_getstack( &attr, (void**)&staddr, &stsize );
4581 #endif
4582
4583         g_assert (staddr);
4584
4585         g_assert ((current > staddr) && (current < staddr + stsize));
4586
4587         tls->end_of_stack = staddr + stsize;
4588
4589         /*
4590          * threads created by nptl does not seem to have a guard page, and
4591          * since the main thread is not created by us, we can't even set one.
4592          * Increasing stsize fools the SIGSEGV signal handler into thinking this
4593          * is a stack overflow exception.
4594          */
4595         tls->stack_size = stsize + getpagesize ();
4596
4597         /* Setup an alternate signal stack */
4598         tls->signal_stack = mmap (0, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4599         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4600
4601         g_assert (tls->signal_stack);
4602
4603         sa.ss_sp = tls->signal_stack;
4604         sa.ss_size = SIGNAL_STACK_SIZE;
4605         sa.ss_flags = SS_ONSTACK;
4606         sigaltstack (&sa, NULL);
4607 }
4608
4609 #endif
4610
4611 void
4612 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4613 {
4614         if (!tls_offset_inited) {
4615                 tls_offset_inited = TRUE;
4616
4617                 appdomain_tls_offset = mono_domain_get_tls_offset ();
4618                 lmf_tls_offset = mono_get_lmf_tls_offset ();
4619                 thread_tls_offset = mono_thread_get_tls_offset ();
4620         }               
4621
4622 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4623         setup_stack (tls);
4624 #endif
4625 }
4626
4627 void
4628 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4629 {
4630 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4631         struct sigaltstack sa;
4632
4633         sa.ss_sp = tls->signal_stack;
4634         sa.ss_size = SIGNAL_STACK_SIZE;
4635         sa.ss_flags = SS_DISABLE;
4636         sigaltstack  (&sa, NULL);
4637
4638         if (tls->signal_stack)
4639                 munmap (tls->signal_stack, SIGNAL_STACK_SIZE);
4640 #endif
4641 }
4642
4643 void
4644 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4645 {
4646         MonoCallInst *call = (MonoCallInst*)inst;
4647         int out_reg = param_regs [0];
4648
4649         if (vt_reg != -1) {
4650                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4651                 MonoInst *vtarg;
4652
4653                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4654                         /*
4655                          * The valuetype is in RAX:RDX after the call, need to be copied to
4656                          * the stack. Push the address here, so the call instruction can
4657                          * access it.
4658                          */
4659                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
4660                         vtarg->sreg1 = vt_reg;
4661                         mono_bblock_add_inst (cfg->cbb, vtarg);
4662
4663                         /* Align stack */
4664                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
4665                 }
4666                 else {
4667                         MONO_INST_NEW (cfg, vtarg, OP_SETREG);
4668                         vtarg->sreg1 = vt_reg;
4669                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4670                         mono_bblock_add_inst (cfg->cbb, vtarg);
4671
4672                         mono_call_inst_add_outarg_reg (call, vtarg->dreg, out_reg, FALSE);
4673
4674                         out_reg = param_regs [1];
4675                 }
4676
4677                 g_free (cinfo);
4678         }
4679
4680         /* add the this argument */
4681         if (this_reg != -1) {
4682                 MonoInst *this;
4683                 MONO_INST_NEW (cfg, this, OP_SETREG);
4684                 this->type = this_type;
4685                 this->sreg1 = this_reg;
4686                 this->dreg = mono_regstate_next_int (cfg->rs);
4687                 mono_bblock_add_inst (cfg->cbb, this);
4688
4689                 mono_call_inst_add_outarg_reg (call, this->dreg, out_reg, FALSE);
4690         }
4691 }
4692
4693 MonoInst*
4694 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4695 {
4696         MonoInst *ins = NULL;
4697
4698         if (cmethod->klass == mono_defaults.math_class) {
4699                 if (strcmp (cmethod->name, "Sin") == 0) {
4700                         MONO_INST_NEW (cfg, ins, OP_SIN);
4701                         ins->inst_i0 = args [0];
4702                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4703                         MONO_INST_NEW (cfg, ins, OP_COS);
4704                         ins->inst_i0 = args [0];
4705                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4706                         if (use_sse2)
4707                                 return ins;
4708                         MONO_INST_NEW (cfg, ins, OP_TAN);
4709                         ins->inst_i0 = args [0];
4710                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4711                         if (use_sse2)
4712                                 return ins;
4713                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4714                         ins->inst_i0 = args [0];
4715                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4716                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4717                         ins->inst_i0 = args [0];
4718                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4719                         MONO_INST_NEW (cfg, ins, OP_ABS);
4720                         ins->inst_i0 = args [0];
4721                 }
4722 #if 0
4723                 /* OP_FREM is not IEEE compatible */
4724                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4725                         MONO_INST_NEW (cfg, ins, OP_FREM);
4726                         ins->inst_i0 = args [0];
4727                         ins->inst_i1 = args [1];
4728                 }
4729 #endif
4730         } else if(cmethod->klass->image == mono_defaults.corlib &&
4731                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4732                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4733
4734                 if (strcmp (cmethod->name, "Increment") == 0) {
4735                         MonoInst *ins_iconst;
4736                         guint32 opcode;
4737
4738                         if (fsig->params [0]->type == MONO_TYPE_I4)
4739                                 opcode = OP_ATOMIC_ADD_NEW_I4;
4740                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4741                                 opcode = OP_ATOMIC_ADD_NEW_I8;
4742                         else
4743                                 g_assert_not_reached ();
4744                         MONO_INST_NEW (cfg, ins, opcode);
4745                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4746                         ins_iconst->inst_c0 = 1;
4747
4748                         ins->inst_i0 = args [0];
4749                         ins->inst_i1 = ins_iconst;
4750                 } else if (strcmp (cmethod->name, "Decrement") == 0) {
4751                         MonoInst *ins_iconst;
4752                         guint32 opcode;
4753
4754                         if (fsig->params [0]->type == MONO_TYPE_I4)
4755                                 opcode = OP_ATOMIC_ADD_NEW_I4;
4756                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4757                                 opcode = OP_ATOMIC_ADD_NEW_I8;
4758                         else
4759                                 g_assert_not_reached ();
4760                         MONO_INST_NEW (cfg, ins, opcode);
4761                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4762                         ins_iconst->inst_c0 = -1;
4763
4764                         ins->inst_i0 = args [0];
4765                         ins->inst_i1 = ins_iconst;
4766                 } else if (strcmp (cmethod->name, "Add") == 0) {
4767                         guint32 opcode;
4768
4769                         if (fsig->params [0]->type == MONO_TYPE_I4)
4770                                 opcode = OP_ATOMIC_ADD_I4;
4771                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4772                                 opcode = OP_ATOMIC_ADD_I8;
4773                         else
4774                                 g_assert_not_reached ();
4775                         
4776                         MONO_INST_NEW (cfg, ins, opcode);
4777
4778                         ins->inst_i0 = args [0];
4779                         ins->inst_i1 = args [1];
4780                 } else if (strcmp (cmethod->name, "Exchange") == 0) {
4781                         guint32 opcode;
4782
4783                         if (fsig->params [0]->type == MONO_TYPE_I4)
4784                                 opcode = OP_ATOMIC_EXCHANGE_I4;
4785                         else if ((fsig->params [0]->type == MONO_TYPE_I8) ||
4786                                          (fsig->params [0]->type == MONO_TYPE_I) ||
4787                                          (fsig->params [0]->type == MONO_TYPE_OBJECT))
4788                                 opcode = OP_ATOMIC_EXCHANGE_I8;
4789                         else
4790                                 return NULL;
4791
4792                         MONO_INST_NEW (cfg, ins, opcode);
4793
4794                         ins->inst_i0 = args [0];
4795                         ins->inst_i1 = args [1];
4796                 } else if (strcmp (cmethod->name, "Read") == 0 && (fsig->params [0]->type == MONO_TYPE_I8)) {
4797                         /* 64 bit reads are already atomic */
4798                         MONO_INST_NEW (cfg, ins, CEE_LDIND_I8);
4799                         ins->inst_i0 = args [0];
4800                 }
4801
4802                 /* 
4803                  * Can't implement CompareExchange methods this way since they have
4804                  * three arguments.
4805                  */
4806         }
4807
4808         return ins;
4809 }
4810
4811 gboolean
4812 mono_arch_print_tree (MonoInst *tree, int arity)
4813 {
4814         return 0;
4815 }
4816
4817 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4818 {
4819         MonoInst* ins;
4820         
4821         if (appdomain_tls_offset == -1)
4822                 return NULL;
4823         
4824         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4825         ins->inst_offset = appdomain_tls_offset;
4826         return ins;
4827 }
4828
4829 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4830 {
4831         MonoInst* ins;
4832         
4833         if (thread_tls_offset == -1)
4834                 return NULL;
4835         
4836         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4837         ins->inst_offset = thread_tls_offset;
4838         return ins;
4839 }