2010-06-22 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *   Zoltan Varga (vargaz@gmail.com)
11  *
12  * (C) 2003 Ximian, Inc.
13  */
14 #include "mini.h"
15 #include <string.h>
16 #include <math.h>
17 #ifdef HAVE_UNISTD_H
18 #include <unistd.h>
19 #endif
20
21 #include <mono/metadata/appdomain.h>
22 #include <mono/metadata/debug-helpers.h>
23 #include <mono/metadata/threads.h>
24 #include <mono/metadata/profiler-private.h>
25 #include <mono/metadata/mono-debug.h>
26 #include <mono/utils/mono-math.h>
27 #include <mono/utils/mono-mmap.h>
28
29 #include "trace.h"
30 #include "ir-emit.h"
31 #include "mini-amd64.h"
32 #include "cpu-amd64.h"
33 #include "debugger-agent.h"
34
35 /* 
36  * Can't define this in mini-amd64.h cause that would turn on the generic code in
37  * method-to-ir.c.
38  */
39 #define MONO_ARCH_IMT_REG AMD64_R11
40
41 static gint lmf_tls_offset = -1;
42 static gint lmf_addr_tls_offset = -1;
43 static gint appdomain_tls_offset = -1;
44
45 #ifdef MONO_XEN_OPT
46 static gboolean optimize_for_xen = TRUE;
47 #else
48 #define optimize_for_xen 0
49 #endif
50
51 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
52
53 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
54
55 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
56
57 #ifdef HOST_WIN32
58 /* Under windows, the calling convention is never stdcall */
59 #define CALLCONV_IS_STDCALL(call_conv) (FALSE)
60 #else
61 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
62 #endif
63
64 /* This mutex protects architecture specific caches */
65 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
66 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
67 static CRITICAL_SECTION mini_arch_mutex;
68
69 MonoBreakpointInfo
70 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
71
72 /*
73  * The code generated for sequence points reads from this location, which is
74  * made read-only when single stepping is enabled.
75  */
76 static gpointer ss_trigger_page;
77
78 /* Enabled breakpoints read from this trigger page */
79 static gpointer bp_trigger_page;
80
81 /* The size of the breakpoint sequence */
82 static int breakpoint_size;
83
84 /* The size of the breakpoint instruction causing the actual fault */
85 static int breakpoint_fault_size;
86
87 /* The size of the single step instruction causing the actual fault */
88 static int single_step_fault_size;
89
90 #ifdef HOST_WIN32
91 /* On Win64 always reserve first 32 bytes for first four arguments */
92 #define ARGS_OFFSET 48
93 #else
94 #define ARGS_OFFSET 16
95 #endif
96 #define GP_SCRATCH_REG AMD64_R11
97
98 /*
99  * AMD64 register usage:
100  * - callee saved registers are used for global register allocation
101  * - %r11 is used for materializing 64 bit constants in opcodes
102  * - the rest is used for local allocation
103  */
104
105 /*
106  * Floating point comparison results:
107  *                  ZF PF CF
108  * A > B            0  0  0
109  * A < B            0  0  1
110  * A = B            1  0  0
111  * A > B            0  0  0
112  * UNORDERED        1  1  1
113  */
114
115 const char*
116 mono_arch_regname (int reg)
117 {
118         switch (reg) {
119         case AMD64_RAX: return "%rax";
120         case AMD64_RBX: return "%rbx";
121         case AMD64_RCX: return "%rcx";
122         case AMD64_RDX: return "%rdx";
123         case AMD64_RSP: return "%rsp";  
124         case AMD64_RBP: return "%rbp";
125         case AMD64_RDI: return "%rdi";
126         case AMD64_RSI: return "%rsi";
127         case AMD64_R8: return "%r8";
128         case AMD64_R9: return "%r9";
129         case AMD64_R10: return "%r10";
130         case AMD64_R11: return "%r11";
131         case AMD64_R12: return "%r12";
132         case AMD64_R13: return "%r13";
133         case AMD64_R14: return "%r14";
134         case AMD64_R15: return "%r15";
135         }
136         return "unknown";
137 }
138
139 static const char * packed_xmmregs [] = {
140         "p:xmm0", "p:xmm1", "p:xmm2", "p:xmm3", "p:xmm4", "p:xmm5", "p:xmm6", "p:xmm7", "p:xmm8",
141         "p:xmm9", "p:xmm10", "p:xmm11", "p:xmm12", "p:xmm13", "p:xmm14", "p:xmm15"
142 };
143
144 static const char * single_xmmregs [] = {
145         "s:xmm0", "s:xmm1", "s:xmm2", "s:xmm3", "s:xmm4", "s:xmm5", "s:xmm6", "s:xmm7", "s:xmm8",
146         "s:xmm9", "s:xmm10", "s:xmm11", "s:xmm12", "s:xmm13", "s:xmm14", "s:xmm15"
147 };
148
149 const char*
150 mono_arch_fregname (int reg)
151 {
152         if (reg < AMD64_XMM_NREG)
153                 return single_xmmregs [reg];
154         else
155                 return "unknown";
156 }
157
158 const char *
159 mono_arch_xregname (int reg)
160 {
161         if (reg < AMD64_XMM_NREG)
162                 return packed_xmmregs [reg];
163         else
164                 return "unknown";
165 }
166
167 G_GNUC_UNUSED static void
168 break_count (void)
169 {
170 }
171
172 G_GNUC_UNUSED static gboolean
173 debug_count (void)
174 {
175         static int count = 0;
176         count ++;
177
178         if (!getenv ("COUNT"))
179                 return TRUE;
180
181         if (count == atoi (getenv ("COUNT"))) {
182                 break_count ();
183         }
184
185         if (count > atoi (getenv ("COUNT"))) {
186                 return FALSE;
187         }
188
189         return TRUE;
190 }
191
192 static gboolean
193 debug_omit_fp (void)
194 {
195 #if 0
196         return debug_count ();
197 #else
198         return TRUE;
199 #endif
200 }
201
202 static inline gboolean
203 amd64_is_near_call (guint8 *code)
204 {
205         /* Skip REX */
206         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
207                 code += 1;
208
209         return code [0] == 0xe8;
210 }
211
212 static inline void 
213 amd64_patch (unsigned char* code, gpointer target)
214 {
215         guint8 rex = 0;
216
217         /* Skip REX */
218         if ((code [0] >= 0x40) && (code [0] <= 0x4f)) {
219                 rex = code [0];
220                 code += 1;
221         }
222
223         if ((code [0] & 0xf8) == 0xb8) {
224                 /* amd64_set_reg_template */
225                 *(guint64*)(code + 1) = (guint64)target;
226         }
227         else if ((code [0] == 0x8b) && rex && x86_modrm_mod (code [1]) == 0 && x86_modrm_rm (code [1]) == 5) {
228                 /* mov 0(%rip), %dreg */
229                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
230         }
231         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
232                 /* call *<OFFSET>(%rip) */
233                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
234         }
235         else if ((code [0] == 0xe8)) {
236                 /* call <DISP> */
237                 gint64 disp = (guint8*)target - (guint8*)code;
238                 g_assert (amd64_is_imm32 (disp));
239                 x86_patch (code, (unsigned char*)target);
240         }
241         else
242                 x86_patch (code, (unsigned char*)target);
243 }
244
245 void 
246 mono_amd64_patch (unsigned char* code, gpointer target)
247 {
248         amd64_patch (code, target);
249 }
250
251 typedef enum {
252         ArgInIReg,
253         ArgInFloatSSEReg,
254         ArgInDoubleSSEReg,
255         ArgOnStack,
256         ArgValuetypeInReg,
257         ArgValuetypeAddrInIReg,
258         ArgNone /* only in pair_storage */
259 } ArgStorage;
260
261 typedef struct {
262         gint16 offset;
263         gint8  reg;
264         ArgStorage storage;
265
266         /* Only if storage == ArgValuetypeInReg */
267         ArgStorage pair_storage [2];
268         gint8 pair_regs [2];
269 } ArgInfo;
270
271 typedef struct {
272         int nargs;
273         guint32 stack_usage;
274         guint32 reg_usage;
275         guint32 freg_usage;
276         gboolean need_stack_align;
277         gboolean vtype_retaddr;
278         /* The index of the vret arg in the argument list */
279         int vret_arg_index;
280         ArgInfo ret;
281         ArgInfo sig_cookie;
282         ArgInfo args [1];
283 } CallInfo;
284
285 #define DEBUG(a) if (cfg->verbose_level > 1) a
286
287 #ifdef HOST_WIN32
288 #define PARAM_REGS 4
289
290 static AMD64_Reg_No param_regs [] = { AMD64_RCX, AMD64_RDX, AMD64_R8, AMD64_R9 };
291
292 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
293 #else
294 #define PARAM_REGS 6
295  
296 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
297
298  static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
299 #endif
300
301 static void inline
302 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
303 {
304     ainfo->offset = *stack_size;
305
306     if (*gr >= PARAM_REGS) {
307                 ainfo->storage = ArgOnStack;
308                 (*stack_size) += sizeof (gpointer);
309     }
310     else {
311                 ainfo->storage = ArgInIReg;
312                 ainfo->reg = param_regs [*gr];
313                 (*gr) ++;
314     }
315 }
316
317 #ifdef HOST_WIN32
318 #define FLOAT_PARAM_REGS 4
319 #else
320 #define FLOAT_PARAM_REGS 8
321 #endif
322
323 static void inline
324 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
325 {
326     ainfo->offset = *stack_size;
327
328     if (*gr >= FLOAT_PARAM_REGS) {
329                 ainfo->storage = ArgOnStack;
330                 (*stack_size) += sizeof (gpointer);
331     }
332     else {
333                 /* A double register */
334                 if (is_double)
335                         ainfo->storage = ArgInDoubleSSEReg;
336                 else
337                         ainfo->storage = ArgInFloatSSEReg;
338                 ainfo->reg = *gr;
339                 (*gr) += 1;
340     }
341 }
342
343 typedef enum ArgumentClass {
344         ARG_CLASS_NO_CLASS,
345         ARG_CLASS_MEMORY,
346         ARG_CLASS_INTEGER,
347         ARG_CLASS_SSE
348 } ArgumentClass;
349
350 static ArgumentClass
351 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
352 {
353         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
354         MonoType *ptype;
355
356         ptype = mini_type_get_underlying_type (NULL, type);
357         switch (ptype->type) {
358         case MONO_TYPE_BOOLEAN:
359         case MONO_TYPE_CHAR:
360         case MONO_TYPE_I1:
361         case MONO_TYPE_U1:
362         case MONO_TYPE_I2:
363         case MONO_TYPE_U2:
364         case MONO_TYPE_I4:
365         case MONO_TYPE_U4:
366         case MONO_TYPE_I:
367         case MONO_TYPE_U:
368         case MONO_TYPE_STRING:
369         case MONO_TYPE_OBJECT:
370         case MONO_TYPE_CLASS:
371         case MONO_TYPE_SZARRAY:
372         case MONO_TYPE_PTR:
373         case MONO_TYPE_FNPTR:
374         case MONO_TYPE_ARRAY:
375         case MONO_TYPE_I8:
376         case MONO_TYPE_U8:
377                 class2 = ARG_CLASS_INTEGER;
378                 break;
379         case MONO_TYPE_R4:
380         case MONO_TYPE_R8:
381 #ifdef HOST_WIN32
382                 class2 = ARG_CLASS_INTEGER;
383 #else
384                 class2 = ARG_CLASS_SSE;
385 #endif
386                 break;
387
388         case MONO_TYPE_TYPEDBYREF:
389                 g_assert_not_reached ();
390
391         case MONO_TYPE_GENERICINST:
392                 if (!mono_type_generic_inst_is_valuetype (ptype)) {
393                         class2 = ARG_CLASS_INTEGER;
394                         break;
395                 }
396                 /* fall through */
397         case MONO_TYPE_VALUETYPE: {
398                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
399                 int i;
400
401                 for (i = 0; i < info->num_fields; ++i) {
402                         class2 = class1;
403                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
404                 }
405                 break;
406         }
407         default:
408                 g_assert_not_reached ();
409         }
410
411         /* Merge */
412         if (class1 == class2)
413                 ;
414         else if (class1 == ARG_CLASS_NO_CLASS)
415                 class1 = class2;
416         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
417                 class1 = ARG_CLASS_MEMORY;
418         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
419                 class1 = ARG_CLASS_INTEGER;
420         else
421                 class1 = ARG_CLASS_SSE;
422
423         return class1;
424 }
425
426 static void
427 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
428                            gboolean is_return,
429                            guint32 *gr, guint32 *fr, guint32 *stack_size)
430 {
431         guint32 size, quad, nquads, i;
432         ArgumentClass args [2];
433         MonoMarshalType *info = NULL;
434         MonoClass *klass;
435         MonoGenericSharingContext tmp_gsctx;
436         gboolean pass_on_stack = FALSE;
437         
438         /* 
439          * The gsctx currently contains no data, it is only used for checking whenever
440          * open types are allowed, some callers like mono_arch_get_argument_info ()
441          * don't pass it to us, so work around that.
442          */
443         if (!gsctx)
444                 gsctx = &tmp_gsctx;
445
446         klass = mono_class_from_mono_type (type);
447         size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
448 #ifndef HOST_WIN32
449         if (!sig->pinvoke && !disable_vtypes_in_regs && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) {
450                 /* We pass and return vtypes of size 8 in a register */
451         } else if (!sig->pinvoke || (size == 0) || (size > 16)) {
452                 pass_on_stack = TRUE;
453         }
454 #else
455         if (!sig->pinvoke) {
456                 pass_on_stack = TRUE;
457         }
458 #endif
459
460         if (pass_on_stack) {
461                 /* Allways pass in memory */
462                 ainfo->offset = *stack_size;
463                 *stack_size += ALIGN_TO (size, 8);
464                 ainfo->storage = ArgOnStack;
465
466                 return;
467         }
468
469         /* FIXME: Handle structs smaller than 8 bytes */
470         //if ((size % 8) != 0)
471         //      NOT_IMPLEMENTED;
472
473         if (size > 8)
474                 nquads = 2;
475         else
476                 nquads = 1;
477
478         if (!sig->pinvoke) {
479                 /* Always pass in 1 or 2 integer registers */
480                 args [0] = ARG_CLASS_INTEGER;
481                 args [1] = ARG_CLASS_INTEGER;
482                 /* Only the simplest cases are supported */
483                 if (is_return && nquads != 1) {
484                         args [0] = ARG_CLASS_MEMORY;
485                         args [1] = ARG_CLASS_MEMORY;
486                 }
487         } else {
488                 /*
489                  * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
490                  * The X87 and SSEUP stuff is left out since there are no such types in
491                  * the CLR.
492                  */
493                 info = mono_marshal_load_type_info (klass);
494                 g_assert (info);
495
496 #ifndef HOST_WIN32
497                 if (info->native_size > 16) {
498                         ainfo->offset = *stack_size;
499                         *stack_size += ALIGN_TO (info->native_size, 8);
500                         ainfo->storage = ArgOnStack;
501
502                         return;
503                 }
504 #else
505                 switch (info->native_size) {
506                 case 1: case 2: case 4: case 8:
507                         break;
508                 default:
509                         if (is_return) {
510                                 ainfo->storage = ArgOnStack;
511                                 ainfo->offset = *stack_size;
512                                 *stack_size += ALIGN_TO (info->native_size, 8);
513                         }
514                         else {
515                                 ainfo->storage = ArgValuetypeAddrInIReg;
516
517                                 if (*gr < PARAM_REGS) {
518                                         ainfo->pair_storage [0] = ArgInIReg;
519                                         ainfo->pair_regs [0] = param_regs [*gr];
520                                         (*gr) ++;
521                                 }
522                                 else {
523                                         ainfo->pair_storage [0] = ArgOnStack;
524                                         ainfo->offset = *stack_size;
525                                         *stack_size += 8;
526                                 }
527                         }
528
529                         return;
530                 }
531 #endif
532
533                 args [0] = ARG_CLASS_NO_CLASS;
534                 args [1] = ARG_CLASS_NO_CLASS;
535                 for (quad = 0; quad < nquads; ++quad) {
536                         int size;
537                         guint32 align;
538                         ArgumentClass class1;
539                 
540                         if (info->num_fields == 0)
541                                 class1 = ARG_CLASS_MEMORY;
542                         else
543                                 class1 = ARG_CLASS_NO_CLASS;
544                         for (i = 0; i < info->num_fields; ++i) {
545                                 size = mono_marshal_type_size (info->fields [i].field->type, 
546                                                                                            info->fields [i].mspec, 
547                                                                                            &align, TRUE, klass->unicode);
548                                 if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
549                                         /* Unaligned field */
550                                         NOT_IMPLEMENTED;
551                                 }
552
553                                 /* Skip fields in other quad */
554                                 if ((quad == 0) && (info->fields [i].offset >= 8))
555                                         continue;
556                                 if ((quad == 1) && (info->fields [i].offset < 8))
557                                         continue;
558
559                                 class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
560                         }
561                         g_assert (class1 != ARG_CLASS_NO_CLASS);
562                         args [quad] = class1;
563                 }
564         }
565
566         /* Post merger cleanup */
567         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
568                 args [0] = args [1] = ARG_CLASS_MEMORY;
569
570         /* Allocate registers */
571         {
572                 int orig_gr = *gr;
573                 int orig_fr = *fr;
574
575                 ainfo->storage = ArgValuetypeInReg;
576                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
577                 for (quad = 0; quad < nquads; ++quad) {
578                         switch (args [quad]) {
579                         case ARG_CLASS_INTEGER:
580                                 if (*gr >= PARAM_REGS)
581                                         args [quad] = ARG_CLASS_MEMORY;
582                                 else {
583                                         ainfo->pair_storage [quad] = ArgInIReg;
584                                         if (is_return)
585                                                 ainfo->pair_regs [quad] = return_regs [*gr];
586                                         else
587                                                 ainfo->pair_regs [quad] = param_regs [*gr];
588                                         (*gr) ++;
589                                 }
590                                 break;
591                         case ARG_CLASS_SSE:
592                                 if (*fr >= FLOAT_PARAM_REGS)
593                                         args [quad] = ARG_CLASS_MEMORY;
594                                 else {
595                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
596                                         ainfo->pair_regs [quad] = *fr;
597                                         (*fr) ++;
598                                 }
599                                 break;
600                         case ARG_CLASS_MEMORY:
601                                 break;
602                         default:
603                                 g_assert_not_reached ();
604                         }
605                 }
606
607                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
608                         /* Revert possible register assignments */
609                         *gr = orig_gr;
610                         *fr = orig_fr;
611
612                         ainfo->offset = *stack_size;
613                         if (sig->pinvoke)
614                                 *stack_size += ALIGN_TO (info->native_size, 8);
615                         else
616                                 *stack_size += nquads * sizeof (gpointer);
617                         ainfo->storage = ArgOnStack;
618                 }
619         }
620 }
621
622 /*
623  * get_call_info:
624  *
625  *  Obtain information about a call according to the calling convention.
626  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
627  * Draft Version 0.23" document for more information.
628  */
629 static CallInfo*
630 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
631 {
632         guint32 i, gr, fr, pstart;
633         MonoType *ret_type;
634         int n = sig->hasthis + sig->param_count;
635         guint32 stack_size = 0;
636         CallInfo *cinfo;
637
638         if (mp)
639                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
640         else
641                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
642
643         cinfo->nargs = n;
644
645         gr = 0;
646         fr = 0;
647
648         /* return value */
649         {
650                 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
651                 switch (ret_type->type) {
652                 case MONO_TYPE_BOOLEAN:
653                 case MONO_TYPE_I1:
654                 case MONO_TYPE_U1:
655                 case MONO_TYPE_I2:
656                 case MONO_TYPE_U2:
657                 case MONO_TYPE_CHAR:
658                 case MONO_TYPE_I4:
659                 case MONO_TYPE_U4:
660                 case MONO_TYPE_I:
661                 case MONO_TYPE_U:
662                 case MONO_TYPE_PTR:
663                 case MONO_TYPE_FNPTR:
664                 case MONO_TYPE_CLASS:
665                 case MONO_TYPE_OBJECT:
666                 case MONO_TYPE_SZARRAY:
667                 case MONO_TYPE_ARRAY:
668                 case MONO_TYPE_STRING:
669                         cinfo->ret.storage = ArgInIReg;
670                         cinfo->ret.reg = AMD64_RAX;
671                         break;
672                 case MONO_TYPE_U8:
673                 case MONO_TYPE_I8:
674                         cinfo->ret.storage = ArgInIReg;
675                         cinfo->ret.reg = AMD64_RAX;
676                         break;
677                 case MONO_TYPE_R4:
678                         cinfo->ret.storage = ArgInFloatSSEReg;
679                         cinfo->ret.reg = AMD64_XMM0;
680                         break;
681                 case MONO_TYPE_R8:
682                         cinfo->ret.storage = ArgInDoubleSSEReg;
683                         cinfo->ret.reg = AMD64_XMM0;
684                         break;
685                 case MONO_TYPE_GENERICINST:
686                         if (!mono_type_generic_inst_is_valuetype (ret_type)) {
687                                 cinfo->ret.storage = ArgInIReg;
688                                 cinfo->ret.reg = AMD64_RAX;
689                                 break;
690                         }
691                         /* fall through */
692                 case MONO_TYPE_VALUETYPE: {
693                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
694
695                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
696                         if (cinfo->ret.storage == ArgOnStack) {
697                                 cinfo->vtype_retaddr = TRUE;
698                                 /* The caller passes the address where the value is stored */
699                         }
700                         break;
701                 }
702                 case MONO_TYPE_TYPEDBYREF:
703                         /* Same as a valuetype with size 24 */
704                         cinfo->vtype_retaddr = TRUE;
705                         break;
706                 case MONO_TYPE_VOID:
707                         break;
708                 default:
709                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
710                 }
711         }
712
713         pstart = 0;
714         /*
715          * To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
716          * the first argument, allowing 'this' to be always passed in the first arg reg.
717          * Also do this if the first argument is a reference type, since virtual calls
718          * are sometimes made using calli without sig->hasthis set, like in the delegate
719          * invoke wrappers.
720          */
721         if (cinfo->vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (sig->params [0])))) {
722                 if (sig->hasthis) {
723                         add_general (&gr, &stack_size, cinfo->args + 0);
724                 } else {
725                         add_general (&gr, &stack_size, &cinfo->args [sig->hasthis + 0]);
726                         pstart = 1;
727                 }
728                 add_general (&gr, &stack_size, &cinfo->ret);
729                 cinfo->vret_arg_index = 1;
730         } else {
731                 /* this */
732                 if (sig->hasthis)
733                         add_general (&gr, &stack_size, cinfo->args + 0);
734
735                 if (cinfo->vtype_retaddr)
736                         add_general (&gr, &stack_size, &cinfo->ret);
737         }
738
739         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
740                 gr = PARAM_REGS;
741                 fr = FLOAT_PARAM_REGS;
742                 
743                 /* Emit the signature cookie just before the implicit arguments */
744                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
745         }
746
747         for (i = pstart; i < sig->param_count; ++i) {
748                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
749                 MonoType *ptype;
750
751 #ifdef HOST_WIN32
752                 /* The float param registers and other param registers must be the same index on Windows x64.*/
753                 if (gr > fr)
754                         fr = gr;
755                 else if (fr > gr)
756                         gr = fr;
757 #endif
758
759                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
760                         /* We allways pass the sig cookie on the stack for simplicity */
761                         /* 
762                          * Prevent implicit arguments + the sig cookie from being passed 
763                          * in registers.
764                          */
765                         gr = PARAM_REGS;
766                         fr = FLOAT_PARAM_REGS;
767
768                         /* Emit the signature cookie just before the implicit arguments */
769                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
770                 }
771
772                 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
773                 switch (ptype->type) {
774                 case MONO_TYPE_BOOLEAN:
775                 case MONO_TYPE_I1:
776                 case MONO_TYPE_U1:
777                         add_general (&gr, &stack_size, ainfo);
778                         break;
779                 case MONO_TYPE_I2:
780                 case MONO_TYPE_U2:
781                 case MONO_TYPE_CHAR:
782                         add_general (&gr, &stack_size, ainfo);
783                         break;
784                 case MONO_TYPE_I4:
785                 case MONO_TYPE_U4:
786                         add_general (&gr, &stack_size, ainfo);
787                         break;
788                 case MONO_TYPE_I:
789                 case MONO_TYPE_U:
790                 case MONO_TYPE_PTR:
791                 case MONO_TYPE_FNPTR:
792                 case MONO_TYPE_CLASS:
793                 case MONO_TYPE_OBJECT:
794                 case MONO_TYPE_STRING:
795                 case MONO_TYPE_SZARRAY:
796                 case MONO_TYPE_ARRAY:
797                         add_general (&gr, &stack_size, ainfo);
798                         break;
799                 case MONO_TYPE_GENERICINST:
800                         if (!mono_type_generic_inst_is_valuetype (ptype)) {
801                                 add_general (&gr, &stack_size, ainfo);
802                                 break;
803                         }
804                         /* fall through */
805                 case MONO_TYPE_VALUETYPE:
806                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
807                         break;
808                 case MONO_TYPE_TYPEDBYREF:
809 #ifdef HOST_WIN32
810                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
811 #else
812                         stack_size += sizeof (MonoTypedRef);
813                         ainfo->storage = ArgOnStack;
814 #endif
815                         break;
816                 case MONO_TYPE_U8:
817                 case MONO_TYPE_I8:
818                         add_general (&gr, &stack_size, ainfo);
819                         break;
820                 case MONO_TYPE_R4:
821                         add_float (&fr, &stack_size, ainfo, FALSE);
822                         break;
823                 case MONO_TYPE_R8:
824                         add_float (&fr, &stack_size, ainfo, TRUE);
825                         break;
826                 default:
827                         g_assert_not_reached ();
828                 }
829         }
830
831         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
832                 gr = PARAM_REGS;
833                 fr = FLOAT_PARAM_REGS;
834                 
835                 /* Emit the signature cookie just before the implicit arguments */
836                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
837         }
838
839 #ifdef HOST_WIN32
840         // There always is 32 bytes reserved on the stack when calling on Winx64
841         stack_size += 0x20;
842 #endif
843
844         if (stack_size & 0x8) {
845                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
846                 cinfo->need_stack_align = TRUE;
847                 stack_size += 8;
848         }
849
850         cinfo->stack_usage = stack_size;
851         cinfo->reg_usage = gr;
852         cinfo->freg_usage = fr;
853         return cinfo;
854 }
855
856 /*
857  * mono_arch_get_argument_info:
858  * @csig:  a method signature
859  * @param_count: the number of parameters to consider
860  * @arg_info: an array to store the result infos
861  *
862  * Gathers information on parameters such as size, alignment and
863  * padding. arg_info should be large enought to hold param_count + 1 entries. 
864  *
865  * Returns the size of the argument area on the stack.
866  */
867 int
868 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
869 {
870         int k;
871         CallInfo *cinfo = get_call_info (NULL, NULL, csig, FALSE);
872         guint32 args_size = cinfo->stack_usage;
873
874         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
875         if (csig->hasthis) {
876                 arg_info [0].offset = 0;
877         }
878
879         for (k = 0; k < param_count; k++) {
880                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
881                 /* FIXME: */
882                 arg_info [k + 1].size = 0;
883         }
884
885         g_free (cinfo);
886
887         return args_size;
888 }
889
890 static int 
891 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
892 {
893 #ifndef _MSC_VER
894         __asm__ __volatile__ ("cpuid"
895                 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
896                 : "a" (id));
897 #else
898         int info[4];
899         __cpuid(info, id);
900         *p_eax = info[0];
901         *p_ebx = info[1];
902         *p_ecx = info[2];
903         *p_edx = info[3];
904 #endif
905         return 1;
906 }
907
908 /*
909  * Initialize the cpu to execute managed code.
910  */
911 void
912 mono_arch_cpu_init (void)
913 {
914 #ifndef _MSC_VER
915         guint16 fpcw;
916
917         /* spec compliance requires running with double precision */
918         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
919         fpcw &= ~X86_FPCW_PRECC_MASK;
920         fpcw |= X86_FPCW_PREC_DOUBLE;
921         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
922         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
923 #else
924         /* TODO: This is crashing on Win64 right now.
925         * _control87 (_PC_53, MCW_PC);
926         */
927 #endif
928 }
929
930 /*
931  * Initialize architecture specific code.
932  */
933 void
934 mono_arch_init (void)
935 {
936         int flags;
937
938         InitializeCriticalSection (&mini_arch_mutex);
939
940 #ifdef MONO_ARCH_NOMAP32BIT
941         flags = MONO_MMAP_READ;
942         /* amd64_mov_reg_imm () + amd64_mov_reg_membase () */
943         breakpoint_size = 13;
944         breakpoint_fault_size = 3;
945         /* amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4); */
946         single_step_fault_size = 5;
947 #else
948         flags = MONO_MMAP_READ|MONO_MMAP_32BIT;
949         /* amd64_mov_reg_mem () */
950         breakpoint_size = 8;
951         breakpoint_fault_size = 8;
952         single_step_fault_size = 8;
953 #endif
954
955         ss_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
956         bp_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
957         mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
958
959         mono_aot_register_jit_icall ("mono_amd64_throw_exception", mono_amd64_throw_exception);
960         mono_aot_register_jit_icall ("mono_amd64_throw_corlib_exception", mono_amd64_throw_corlib_exception);
961         mono_aot_register_jit_icall ("mono_amd64_get_original_ip", mono_amd64_get_original_ip);
962 }
963
964 /*
965  * Cleanup architecture specific code.
966  */
967 void
968 mono_arch_cleanup (void)
969 {
970         DeleteCriticalSection (&mini_arch_mutex);
971 }
972
973 /*
974  * This function returns the optimizations supported on this cpu.
975  */
976 guint32
977 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
978 {
979         int eax, ebx, ecx, edx;
980         guint32 opts = 0;
981
982         /* FIXME: AMD64 */
983
984         *exclude_mask = 0;
985         /* Feature Flags function, flags returned in EDX. */
986         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
987                 if (edx & (1 << 15)) {
988                         opts |= MONO_OPT_CMOV;
989                         if (edx & 1)
990                                 opts |= MONO_OPT_FCMOV;
991                         else
992                                 *exclude_mask |= MONO_OPT_FCMOV;
993                 } else
994                         *exclude_mask |= MONO_OPT_CMOV;
995         }
996
997         return opts;
998 }
999
1000 /*
1001  * This function test for all SSE functions supported.
1002  *
1003  * Returns a bitmask corresponding to all supported versions.
1004  * 
1005  */
1006 guint32
1007 mono_arch_cpu_enumerate_simd_versions (void)
1008 {
1009         int eax, ebx, ecx, edx;
1010         guint32 sse_opts = 0;
1011
1012         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
1013                 if (edx & (1 << 25))
1014                         sse_opts |= SIMD_VERSION_SSE1;
1015                 if (edx & (1 << 26))
1016                         sse_opts |= SIMD_VERSION_SSE2;
1017                 if (ecx & (1 << 0))
1018                         sse_opts |= SIMD_VERSION_SSE3;
1019                 if (ecx & (1 << 9))
1020                         sse_opts |= SIMD_VERSION_SSSE3;
1021                 if (ecx & (1 << 19))
1022                         sse_opts |= SIMD_VERSION_SSE41;
1023                 if (ecx & (1 << 20))
1024                         sse_opts |= SIMD_VERSION_SSE42;
1025         }
1026
1027         /* Yes, all this needs to be done to check for sse4a.
1028            See: "Amd: CPUID Specification"
1029          */
1030         if (cpuid (0x80000000, &eax, &ebx, &ecx, &edx)) {
1031                 /* eax greater or equal than 0x80000001, ebx = 'htuA', ecx = DMAc', edx = 'itne'*/
1032                 if ((((unsigned int) eax) >= 0x80000001) && (ebx == 0x68747541) && (ecx == 0x444D4163) && (edx == 0x69746E65)) {
1033                         cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
1034                         if (ecx & (1 << 6))
1035                                 sse_opts |= SIMD_VERSION_SSE4a;
1036                 }
1037         }
1038
1039         return sse_opts;        
1040 }
1041
1042 #ifndef DISABLE_JIT
1043
1044 GList *
1045 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
1046 {
1047         GList *vars = NULL;
1048         int i;
1049
1050         for (i = 0; i < cfg->num_varinfo; i++) {
1051                 MonoInst *ins = cfg->varinfo [i];
1052                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
1053
1054                 /* unused vars */
1055                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
1056                         continue;
1057
1058                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
1059                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
1060                         continue;
1061
1062                 if (mono_is_regsize_var (ins->inst_vtype)) {
1063                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
1064                         g_assert (i == vmv->idx);
1065                         vars = g_list_prepend (vars, vmv);
1066                 }
1067         }
1068
1069         vars = mono_varlist_sort (cfg, vars, 0);
1070
1071         return vars;
1072 }
1073
1074 /**
1075  * mono_arch_compute_omit_fp:
1076  *
1077  *   Determine whenever the frame pointer can be eliminated.
1078  */
1079 static void
1080 mono_arch_compute_omit_fp (MonoCompile *cfg)
1081 {
1082         MonoMethodSignature *sig;
1083         MonoMethodHeader *header;
1084         int i, locals_size;
1085         CallInfo *cinfo;
1086
1087         if (cfg->arch.omit_fp_computed)
1088                 return;
1089
1090         header = cfg->header;
1091
1092         sig = mono_method_signature (cfg->method);
1093
1094         if (!cfg->arch.cinfo)
1095                 cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1096         cinfo = cfg->arch.cinfo;
1097
1098         /*
1099          * FIXME: Remove some of the restrictions.
1100          */
1101         cfg->arch.omit_fp = TRUE;
1102         cfg->arch.omit_fp_computed = TRUE;
1103
1104         if (cfg->disable_omit_fp)
1105                 cfg->arch.omit_fp = FALSE;
1106
1107         if (!debug_omit_fp ())
1108                 cfg->arch.omit_fp = FALSE;
1109         /*
1110         if (cfg->method->save_lmf)
1111                 cfg->arch.omit_fp = FALSE;
1112         */
1113         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
1114                 cfg->arch.omit_fp = FALSE;
1115         if (header->num_clauses)
1116                 cfg->arch.omit_fp = FALSE;
1117         if (cfg->param_area)
1118                 cfg->arch.omit_fp = FALSE;
1119         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1120                 cfg->arch.omit_fp = FALSE;
1121         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
1122                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
1123                 cfg->arch.omit_fp = FALSE;
1124         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1125                 ArgInfo *ainfo = &cinfo->args [i];
1126
1127                 if (ainfo->storage == ArgOnStack) {
1128                         /* 
1129                          * The stack offset can only be determined when the frame
1130                          * size is known.
1131                          */
1132                         cfg->arch.omit_fp = FALSE;
1133                 }
1134         }
1135
1136         locals_size = 0;
1137         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1138                 MonoInst *ins = cfg->varinfo [i];
1139                 int ialign;
1140
1141                 locals_size += mono_type_size (ins->inst_vtype, &ialign);
1142         }
1143 }
1144
1145 GList *
1146 mono_arch_get_global_int_regs (MonoCompile *cfg)
1147 {
1148         GList *regs = NULL;
1149
1150         mono_arch_compute_omit_fp (cfg);
1151
1152         if (cfg->globalra) {
1153                 if (cfg->arch.omit_fp)
1154                         regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1155  
1156                 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1157                 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1158                 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1159                 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1160                 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1161  
1162                 regs = g_list_prepend (regs, (gpointer)AMD64_R10);
1163                 regs = g_list_prepend (regs, (gpointer)AMD64_R9);
1164                 regs = g_list_prepend (regs, (gpointer)AMD64_R8);
1165                 regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1166                 regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1167                 regs = g_list_prepend (regs, (gpointer)AMD64_RDX);
1168                 regs = g_list_prepend (regs, (gpointer)AMD64_RCX);
1169                 regs = g_list_prepend (regs, (gpointer)AMD64_RAX);
1170         } else {
1171                 if (cfg->arch.omit_fp)
1172                         regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1173
1174                 /* We use the callee saved registers for global allocation */
1175                 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1176                 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1177                 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1178                 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1179                 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1180 #ifdef HOST_WIN32
1181                 regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1182                 regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1183 #endif
1184         }
1185
1186         return regs;
1187 }
1188  
1189 GList*
1190 mono_arch_get_global_fp_regs (MonoCompile *cfg)
1191 {
1192         GList *regs = NULL;
1193         int i;
1194
1195         /* All XMM registers */
1196         for (i = 0; i < 16; ++i)
1197                 regs = g_list_prepend (regs, GINT_TO_POINTER (i));
1198
1199         return regs;
1200 }
1201
1202 GList*
1203 mono_arch_get_iregs_clobbered_by_call (MonoCallInst *call)
1204 {
1205         static GList *r = NULL;
1206
1207         if (r == NULL) {
1208                 GList *regs = NULL;
1209
1210                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1211                 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1212                 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1213                 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1214                 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1215                 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1216
1217                 regs = g_list_prepend (regs, (gpointer)AMD64_R10);
1218                 regs = g_list_prepend (regs, (gpointer)AMD64_R9);
1219                 regs = g_list_prepend (regs, (gpointer)AMD64_R8);
1220                 regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1221                 regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1222                 regs = g_list_prepend (regs, (gpointer)AMD64_RDX);
1223                 regs = g_list_prepend (regs, (gpointer)AMD64_RCX);
1224                 regs = g_list_prepend (regs, (gpointer)AMD64_RAX);
1225
1226                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1227         }
1228
1229         return r;
1230 }
1231
1232 GList*
1233 mono_arch_get_fregs_clobbered_by_call (MonoCallInst *call)
1234 {
1235         int i;
1236         static GList *r = NULL;
1237
1238         if (r == NULL) {
1239                 GList *regs = NULL;
1240
1241                 for (i = 0; i < AMD64_XMM_NREG; ++i)
1242                         regs = g_list_prepend (regs, GINT_TO_POINTER (MONO_MAX_IREGS + i));
1243
1244                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1245         }
1246
1247         return r;
1248 }
1249
1250 /*
1251  * mono_arch_regalloc_cost:
1252  *
1253  *  Return the cost, in number of memory references, of the action of 
1254  * allocating the variable VMV into a register during global register
1255  * allocation.
1256  */
1257 guint32
1258 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
1259 {
1260         MonoInst *ins = cfg->varinfo [vmv->idx];
1261
1262         if (cfg->method->save_lmf)
1263                 /* The register is already saved */
1264                 /* substract 1 for the invisible store in the prolog */
1265                 return (ins->opcode == OP_ARG) ? 0 : 1;
1266         else
1267                 /* push+pop */
1268                 return (ins->opcode == OP_ARG) ? 1 : 2;
1269 }
1270
1271 /*
1272  * mono_arch_fill_argument_info:
1273  *
1274  *   Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments
1275  * of the method.
1276  */
1277 void
1278 mono_arch_fill_argument_info (MonoCompile *cfg)
1279 {
1280         MonoMethodSignature *sig;
1281         MonoMethodHeader *header;
1282         MonoInst *ins;
1283         int i;
1284         CallInfo *cinfo;
1285
1286         header = cfg->header;
1287
1288         sig = mono_method_signature (cfg->method);
1289
1290         cinfo = cfg->arch.cinfo;
1291
1292         /*
1293          * Contrary to mono_arch_allocate_vars (), the information should describe
1294          * where the arguments are at the beginning of the method, not where they can be 
1295          * accessed during the execution of the method. The later makes no sense for the 
1296          * global register allocator, since a variable can be in more than one location.
1297          */
1298         if (sig->ret->type != MONO_TYPE_VOID) {
1299                 switch (cinfo->ret.storage) {
1300                 case ArgInIReg:
1301                 case ArgInFloatSSEReg:
1302                 case ArgInDoubleSSEReg:
1303                         if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
1304                                 cfg->vret_addr->opcode = OP_REGVAR;
1305                                 cfg->vret_addr->inst_c0 = cinfo->ret.reg;
1306                         }
1307                         else {
1308                                 cfg->ret->opcode = OP_REGVAR;
1309                                 cfg->ret->inst_c0 = cinfo->ret.reg;
1310                         }
1311                         break;
1312                 case ArgValuetypeInReg:
1313                         cfg->ret->opcode = OP_REGOFFSET;
1314                         cfg->ret->inst_basereg = -1;
1315                         cfg->ret->inst_offset = -1;
1316                         break;
1317                 default:
1318                         g_assert_not_reached ();
1319                 }
1320         }
1321
1322         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1323                 ArgInfo *ainfo = &cinfo->args [i];
1324                 MonoType *arg_type;
1325
1326                 ins = cfg->args [i];
1327
1328                 if (sig->hasthis && (i == 0))
1329                         arg_type = &mono_defaults.object_class->byval_arg;
1330                 else
1331                         arg_type = sig->params [i - sig->hasthis];
1332
1333                 switch (ainfo->storage) {
1334                 case ArgInIReg:
1335                 case ArgInFloatSSEReg:
1336                 case ArgInDoubleSSEReg:
1337                         ins->opcode = OP_REGVAR;
1338                         ins->inst_c0 = ainfo->reg;
1339                         break;
1340                 case ArgOnStack:
1341                         ins->opcode = OP_REGOFFSET;
1342                         ins->inst_basereg = -1;
1343                         ins->inst_offset = -1;
1344                         break;
1345                 case ArgValuetypeInReg:
1346                         /* Dummy */
1347                         ins->opcode = OP_NOP;
1348                         break;
1349                 default:
1350                         g_assert_not_reached ();
1351                 }
1352         }
1353 }
1354  
1355 void
1356 mono_arch_allocate_vars (MonoCompile *cfg)
1357 {
1358         MonoMethodSignature *sig;
1359         MonoMethodHeader *header;
1360         MonoInst *ins;
1361         int i, offset;
1362         guint32 locals_stack_size, locals_stack_align;
1363         gint32 *offsets;
1364         CallInfo *cinfo;
1365
1366         header = cfg->header;
1367
1368         sig = mono_method_signature (cfg->method);
1369
1370         cinfo = cfg->arch.cinfo;
1371
1372         mono_arch_compute_omit_fp (cfg);
1373
1374         /*
1375          * We use the ABI calling conventions for managed code as well.
1376          * Exception: valuetypes are only sometimes passed or returned in registers.
1377          */
1378
1379         /*
1380          * The stack looks like this:
1381          * <incoming arguments passed on the stack>
1382          * <return value>
1383          * <lmf/caller saved registers>
1384          * <locals>
1385          * <spill area>
1386          * <localloc area>  -> grows dynamically
1387          * <params area>
1388          */
1389
1390         if (cfg->arch.omit_fp) {
1391                 cfg->flags |= MONO_CFG_HAS_SPILLUP;
1392                 cfg->frame_reg = AMD64_RSP;
1393                 offset = 0;
1394         } else {
1395                 /* Locals are allocated backwards from %fp */
1396                 cfg->frame_reg = AMD64_RBP;
1397                 offset = 0;
1398         }
1399
1400         if (cfg->method->save_lmf) {
1401                 /* Reserve stack space for saving LMF */
1402                 if (cfg->arch.omit_fp) {
1403                         cfg->arch.lmf_offset = offset;
1404                         offset += sizeof (MonoLMF);
1405                 }
1406                 else {
1407                         offset += sizeof (MonoLMF);
1408                         cfg->arch.lmf_offset = -offset;
1409                 }
1410         } else {
1411                 if (cfg->arch.omit_fp)
1412                         cfg->arch.reg_save_area_offset = offset;
1413                 /* Reserve space for caller saved registers */
1414                 for (i = 0; i < AMD64_NREG; ++i)
1415                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
1416                                 offset += sizeof (gpointer);
1417                         }
1418         }
1419
1420         if (sig->ret->type != MONO_TYPE_VOID) {
1421                 switch (cinfo->ret.storage) {
1422                 case ArgInIReg:
1423                 case ArgInFloatSSEReg:
1424                 case ArgInDoubleSSEReg:
1425                         if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
1426                                 if (cfg->globalra) {
1427                                         cfg->vret_addr->opcode = OP_REGVAR;
1428                                         cfg->vret_addr->inst_c0 = cinfo->ret.reg;
1429                                 } else {
1430                                         /* The register is volatile */
1431                                         cfg->vret_addr->opcode = OP_REGOFFSET;
1432                                         cfg->vret_addr->inst_basereg = cfg->frame_reg;
1433                                         if (cfg->arch.omit_fp) {
1434                                                 cfg->vret_addr->inst_offset = offset;
1435                                                 offset += 8;
1436                                         } else {
1437                                                 offset += 8;
1438                                                 cfg->vret_addr->inst_offset = -offset;
1439                                         }
1440                                         if (G_UNLIKELY (cfg->verbose_level > 1)) {
1441                                                 printf ("vret_addr =");
1442                                                 mono_print_ins (cfg->vret_addr);
1443                                         }
1444                                 }
1445                         }
1446                         else {
1447                                 cfg->ret->opcode = OP_REGVAR;
1448                                 cfg->ret->inst_c0 = cinfo->ret.reg;
1449                         }
1450                         break;
1451                 case ArgValuetypeInReg:
1452                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1453                         cfg->ret->opcode = OP_REGOFFSET;
1454                         cfg->ret->inst_basereg = cfg->frame_reg;
1455                         if (cfg->arch.omit_fp) {
1456                                 cfg->ret->inst_offset = offset;
1457                                 offset += 16;
1458                         } else {
1459                                 offset += 16;
1460                                 cfg->ret->inst_offset = - offset;
1461                         }
1462                         break;
1463                 default:
1464                         g_assert_not_reached ();
1465                 }
1466                 if (!cfg->globalra)
1467                         cfg->ret->dreg = cfg->ret->inst_c0;
1468         }
1469
1470         /* Allocate locals */
1471         if (!cfg->globalra) {
1472                 offsets = mono_allocate_stack_slots_full (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align);
1473                 if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1474                         char *mname = mono_method_full_name (cfg->method, TRUE);
1475                         cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1476                         cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1477                         g_free (mname);
1478                         return;
1479                 }
1480                 
1481                 if (locals_stack_align) {
1482                         offset += (locals_stack_align - 1);
1483                         offset &= ~(locals_stack_align - 1);
1484                 }
1485                 if (cfg->arch.omit_fp) {
1486                         cfg->locals_min_stack_offset = offset;
1487                         cfg->locals_max_stack_offset = offset + locals_stack_size;
1488                 } else {
1489                         cfg->locals_min_stack_offset = - (offset + locals_stack_size);
1490                         cfg->locals_max_stack_offset = - offset;
1491                 }
1492                 
1493                 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1494                         if (offsets [i] != -1) {
1495                                 MonoInst *ins = cfg->varinfo [i];
1496                                 ins->opcode = OP_REGOFFSET;
1497                                 ins->inst_basereg = cfg->frame_reg;
1498                                 if (cfg->arch.omit_fp)
1499                                         ins->inst_offset = (offset + offsets [i]);
1500                                 else
1501                                         ins->inst_offset = - (offset + offsets [i]);
1502                                 //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
1503                         }
1504                 }
1505                 offset += locals_stack_size;
1506         }
1507
1508         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
1509                 g_assert (!cfg->arch.omit_fp);
1510                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1511                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1512         }
1513
1514         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1515                 ins = cfg->args [i];
1516                 if (ins->opcode != OP_REGVAR) {
1517                         ArgInfo *ainfo = &cinfo->args [i];
1518                         gboolean inreg = TRUE;
1519                         MonoType *arg_type;
1520
1521                         if (sig->hasthis && (i == 0))
1522                                 arg_type = &mono_defaults.object_class->byval_arg;
1523                         else
1524                                 arg_type = sig->params [i - sig->hasthis];
1525
1526                         if (cfg->globalra) {
1527                                 /* The new allocator needs info about the original locations of the arguments */
1528                                 switch (ainfo->storage) {
1529                                 case ArgInIReg:
1530                                 case ArgInFloatSSEReg:
1531                                 case ArgInDoubleSSEReg:
1532                                         ins->opcode = OP_REGVAR;
1533                                         ins->inst_c0 = ainfo->reg;
1534                                         break;
1535                                 case ArgOnStack:
1536                                         g_assert (!cfg->arch.omit_fp);
1537                                         ins->opcode = OP_REGOFFSET;
1538                                         ins->inst_basereg = cfg->frame_reg;
1539                                         ins->inst_offset = ainfo->offset + ARGS_OFFSET;
1540                                         break;
1541                                 case ArgValuetypeInReg:
1542                                         ins->opcode = OP_REGOFFSET;
1543                                         ins->inst_basereg = cfg->frame_reg;
1544                                         /* These arguments are saved to the stack in the prolog */
1545                                         offset = ALIGN_TO (offset, sizeof (gpointer));
1546                                         if (cfg->arch.omit_fp) {
1547                                                 ins->inst_offset = offset;
1548                                                 offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1549                                         } else {
1550                                                 offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1551                                                 ins->inst_offset = - offset;
1552                                         }
1553                                         break;
1554                                 default:
1555                                         g_assert_not_reached ();
1556                                 }
1557
1558                                 continue;
1559                         }
1560
1561                         /* FIXME: Allocate volatile arguments to registers */
1562                         if (ins->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
1563                                 inreg = FALSE;
1564
1565                         /* 
1566                          * Under AMD64, all registers used to pass arguments to functions
1567                          * are volatile across calls.
1568                          * FIXME: Optimize this.
1569                          */
1570                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg))
1571                                 inreg = FALSE;
1572
1573                         ins->opcode = OP_REGOFFSET;
1574
1575                         switch (ainfo->storage) {
1576                         case ArgInIReg:
1577                         case ArgInFloatSSEReg:
1578                         case ArgInDoubleSSEReg:
1579                                 if (inreg) {
1580                                         ins->opcode = OP_REGVAR;
1581                                         ins->dreg = ainfo->reg;
1582                                 }
1583                                 break;
1584                         case ArgOnStack:
1585                                 g_assert (!cfg->arch.omit_fp);
1586                                 ins->opcode = OP_REGOFFSET;
1587                                 ins->inst_basereg = cfg->frame_reg;
1588                                 ins->inst_offset = ainfo->offset + ARGS_OFFSET;
1589                                 break;
1590                         case ArgValuetypeInReg:
1591                                 break;
1592                         case ArgValuetypeAddrInIReg: {
1593                                 MonoInst *indir;
1594                                 g_assert (!cfg->arch.omit_fp);
1595                                 
1596                                 MONO_INST_NEW (cfg, indir, 0);
1597                                 indir->opcode = OP_REGOFFSET;
1598                                 if (ainfo->pair_storage [0] == ArgInIReg) {
1599                                         indir->inst_basereg = cfg->frame_reg;
1600                                         offset = ALIGN_TO (offset, sizeof (gpointer));
1601                                         offset += (sizeof (gpointer));
1602                                         indir->inst_offset = - offset;
1603                                 }
1604                                 else {
1605                                         indir->inst_basereg = cfg->frame_reg;
1606                                         indir->inst_offset = ainfo->offset + ARGS_OFFSET;
1607                                 }
1608                                 
1609                                 ins->opcode = OP_VTARG_ADDR;
1610                                 ins->inst_left = indir;
1611                                 
1612                                 break;
1613                         }
1614                         default:
1615                                 NOT_IMPLEMENTED;
1616                         }
1617
1618                         if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg)) {
1619                                 ins->opcode = OP_REGOFFSET;
1620                                 ins->inst_basereg = cfg->frame_reg;
1621                                 /* These arguments are saved to the stack in the prolog */
1622                                 offset = ALIGN_TO (offset, sizeof (gpointer));
1623                                 if (cfg->arch.omit_fp) {
1624                                         ins->inst_offset = offset;
1625                                         offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1626                                         // Arguments are yet supported by the stack map creation code
1627                                         //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset);
1628                                 } else {
1629                                         offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1630                                         ins->inst_offset = - offset;
1631                                         //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset);
1632                                 }
1633                         }
1634                 }
1635         }
1636
1637         cfg->stack_offset = offset;
1638 }
1639
1640 void
1641 mono_arch_create_vars (MonoCompile *cfg)
1642 {
1643         MonoMethodSignature *sig;
1644         CallInfo *cinfo;
1645
1646         sig = mono_method_signature (cfg->method);
1647
1648         if (!cfg->arch.cinfo)
1649                 cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1650         cinfo = cfg->arch.cinfo;
1651
1652         if (cinfo->ret.storage == ArgValuetypeInReg)
1653                 cfg->ret_var_is_local = TRUE;
1654
1655         if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1656                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1657                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1658                         printf ("vret_addr = ");
1659                         mono_print_ins (cfg->vret_addr);
1660                 }
1661         }
1662
1663         if (cfg->gen_seq_points) {
1664                 MonoInst *ins;
1665
1666             ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1667                 ins->flags |= MONO_INST_VOLATILE;
1668                 cfg->arch.ss_trigger_page_var = ins;
1669         }
1670
1671 #ifdef MONO_AMD64_NO_PUSHES
1672         /*
1673          * When this is set, we pass arguments on the stack by moves, and by allocating 
1674          * a bigger stack frame, instead of pushes.
1675          * Pushes complicate exception handling because the arguments on the stack have
1676          * to be popped each time a frame is unwound. They also make fp elimination
1677          * impossible.
1678          * FIXME: This doesn't work inside filter/finally clauses, since those execute
1679          * on a new frame which doesn't include a param area.
1680          */
1681         cfg->arch.no_pushes = TRUE;
1682 #endif
1683 }
1684
1685 static void
1686 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *tree)
1687 {
1688         MonoInst *ins;
1689
1690         switch (storage) {
1691         case ArgInIReg:
1692                 MONO_INST_NEW (cfg, ins, OP_MOVE);
1693                 ins->dreg = mono_alloc_ireg (cfg);
1694                 ins->sreg1 = tree->dreg;
1695                 MONO_ADD_INS (cfg->cbb, ins);
1696                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
1697                 break;
1698         case ArgInFloatSSEReg:
1699                 MONO_INST_NEW (cfg, ins, OP_AMD64_SET_XMMREG_R4);
1700                 ins->dreg = mono_alloc_freg (cfg);
1701                 ins->sreg1 = tree->dreg;
1702                 MONO_ADD_INS (cfg->cbb, ins);
1703
1704                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
1705                 break;
1706         case ArgInDoubleSSEReg:
1707                 MONO_INST_NEW (cfg, ins, OP_FMOVE);
1708                 ins->dreg = mono_alloc_freg (cfg);
1709                 ins->sreg1 = tree->dreg;
1710                 MONO_ADD_INS (cfg->cbb, ins);
1711
1712                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
1713
1714                 break;
1715         default:
1716                 g_assert_not_reached ();
1717         }
1718 }
1719
1720 static int
1721 arg_storage_to_load_membase (ArgStorage storage)
1722 {
1723         switch (storage) {
1724         case ArgInIReg:
1725                 return OP_LOAD_MEMBASE;
1726         case ArgInDoubleSSEReg:
1727                 return OP_LOADR8_MEMBASE;
1728         case ArgInFloatSSEReg:
1729                 return OP_LOADR4_MEMBASE;
1730         default:
1731                 g_assert_not_reached ();
1732         }
1733
1734         return -1;
1735 }
1736
1737 static void
1738 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1739 {
1740         MonoInst *arg;
1741         MonoMethodSignature *tmp_sig;
1742         MonoInst *sig_arg;
1743
1744         if (call->tail_call)
1745                 NOT_IMPLEMENTED;
1746
1747         /* FIXME: Add support for signature tokens to AOT */
1748         cfg->disable_aot = TRUE;
1749
1750         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1751                         
1752         /*
1753          * mono_ArgIterator_Setup assumes the signature cookie is 
1754          * passed first and all the arguments which were before it are
1755          * passed on the stack after the signature. So compensate by 
1756          * passing a different signature.
1757          */
1758         tmp_sig = mono_metadata_signature_dup (call->signature);
1759         tmp_sig->param_count -= call->signature->sentinelpos;
1760         tmp_sig->sentinelpos = 0;
1761         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1762
1763         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1764         sig_arg->dreg = mono_alloc_ireg (cfg);
1765         sig_arg->inst_p0 = tmp_sig;
1766         MONO_ADD_INS (cfg->cbb, sig_arg);
1767
1768         if (cfg->arch.no_pushes) {
1769                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, cinfo->sig_cookie.offset, sig_arg->dreg);
1770         } else {
1771                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1772                 arg->sreg1 = sig_arg->dreg;
1773                 MONO_ADD_INS (cfg->cbb, arg);
1774         }
1775 }
1776
1777 static inline LLVMArgStorage
1778 arg_storage_to_llvm_arg_storage (MonoCompile *cfg, ArgStorage storage)
1779 {
1780         switch (storage) {
1781         case ArgInIReg:
1782                 return LLVMArgInIReg;
1783         case ArgNone:
1784                 return LLVMArgNone;
1785         default:
1786                 g_assert_not_reached ();
1787                 return LLVMArgNone;
1788         }
1789 }
1790
1791 #ifdef ENABLE_LLVM
1792 LLVMCallInfo*
1793 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1794 {
1795         int i, n;
1796         CallInfo *cinfo;
1797         ArgInfo *ainfo;
1798         int j;
1799         LLVMCallInfo *linfo;
1800         MonoType *t;
1801
1802         n = sig->param_count + sig->hasthis;
1803
1804         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1805
1806         linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1807
1808         /*
1809          * LLVM always uses the native ABI while we use our own ABI, the
1810          * only difference is the handling of vtypes:
1811          * - we only pass/receive them in registers in some cases, and only 
1812          *   in 1 or 2 integer registers.
1813          */
1814         if (cinfo->ret.storage == ArgValuetypeInReg) {
1815                 if (sig->pinvoke) {
1816                         cfg->exception_message = g_strdup ("pinvoke + vtypes");
1817                         cfg->disable_llvm = TRUE;
1818                         return linfo;
1819                 }
1820
1821                 linfo->ret.storage = LLVMArgVtypeInReg;
1822                 for (j = 0; j < 2; ++j)
1823                         linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
1824         }
1825
1826         if (MONO_TYPE_ISSTRUCT (sig->ret) && cinfo->ret.storage == ArgInIReg) {
1827                 /* Vtype returned using a hidden argument */
1828                 linfo->ret.storage = LLVMArgVtypeRetAddr;
1829                 linfo->vret_arg_index = cinfo->vret_arg_index;
1830         }
1831
1832         for (i = 0; i < n; ++i) {
1833                 ainfo = cinfo->args + i;
1834
1835                 if (i >= sig->hasthis)
1836                         t = sig->params [i - sig->hasthis];
1837                 else
1838                         t = &mono_defaults.int_class->byval_arg;
1839
1840                 linfo->args [i].storage = LLVMArgNone;
1841
1842                 switch (ainfo->storage) {
1843                 case ArgInIReg:
1844                         linfo->args [i].storage = LLVMArgInIReg;
1845                         break;
1846                 case ArgInDoubleSSEReg:
1847                 case ArgInFloatSSEReg:
1848                         linfo->args [i].storage = LLVMArgInFPReg;
1849                         break;
1850                 case ArgOnStack:
1851                         if (MONO_TYPE_ISSTRUCT (t)) {
1852                                 linfo->args [i].storage = LLVMArgVtypeByVal;
1853                         } else {
1854                                 linfo->args [i].storage = LLVMArgInIReg;
1855                                 if (!t->byref) {
1856                                         if (t->type == MONO_TYPE_R4)
1857                                                 linfo->args [i].storage = LLVMArgInFPReg;
1858                                         else if (t->type == MONO_TYPE_R8)
1859                                                 linfo->args [i].storage = LLVMArgInFPReg;
1860                                 }
1861                         }
1862                         break;
1863                 case ArgValuetypeInReg:
1864                         if (sig->pinvoke) {
1865                                 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1866                                 cfg->disable_llvm = TRUE;
1867                                 return linfo;
1868                         }
1869
1870                         linfo->args [i].storage = LLVMArgVtypeInReg;
1871                         for (j = 0; j < 2; ++j)
1872                                 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1873                         break;
1874                 default:
1875                         cfg->exception_message = g_strdup ("ainfo->storage");
1876                         cfg->disable_llvm = TRUE;
1877                         break;
1878                 }
1879         }
1880
1881         return linfo;
1882 }
1883 #endif
1884
1885 void
1886 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1887 {
1888         MonoInst *arg, *in;
1889         MonoMethodSignature *sig;
1890         int i, n, stack_size;
1891         CallInfo *cinfo;
1892         ArgInfo *ainfo;
1893
1894         stack_size = 0;
1895
1896         sig = call->signature;
1897         n = sig->param_count + sig->hasthis;
1898
1899         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1900
1901         if (COMPILE_LLVM (cfg)) {
1902                 /* We shouldn't be called in the llvm case */
1903                 cfg->disable_llvm = TRUE;
1904                 return;
1905         }
1906
1907         if (cinfo->need_stack_align) {
1908                 if (!cfg->arch.no_pushes)
1909                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1910         }
1911
1912         /* 
1913          * Emit all arguments which are passed on the stack to prevent register
1914          * allocation problems.
1915          */
1916         if (cfg->arch.no_pushes) {
1917                 for (i = 0; i < n; ++i) {
1918                         MonoType *t;
1919                         ainfo = cinfo->args + i;
1920
1921                         in = call->args [i];
1922
1923                         if (sig->hasthis && i == 0)
1924                                 t = &mono_defaults.object_class->byval_arg;
1925                         else
1926                                 t = sig->params [i - sig->hasthis];
1927
1928                         if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call) {
1929                                 if (!t->byref) {
1930                                         if (t->type == MONO_TYPE_R4)
1931                                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
1932                                         else if (t->type == MONO_TYPE_R8)
1933                                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
1934                                         else
1935                                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
1936                                 } else {
1937                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
1938                                 }
1939                         }
1940                 }
1941         }
1942
1943         /*
1944          * Emit all parameters passed in registers in non-reverse order for better readability
1945          * and to help the optimization in emit_prolog ().
1946          */
1947         for (i = 0; i < n; ++i) {
1948                 ainfo = cinfo->args + i;
1949
1950                 in = call->args [i];
1951
1952                 if (ainfo->storage == ArgInIReg)
1953                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
1954         }
1955
1956         for (i = n - 1; i >= 0; --i) {
1957                 ainfo = cinfo->args + i;
1958
1959                 in = call->args [i];
1960
1961                 switch (ainfo->storage) {
1962                 case ArgInIReg:
1963                         /* Already done */
1964                         break;
1965                 case ArgInFloatSSEReg:
1966                 case ArgInDoubleSSEReg:
1967                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
1968                         break;
1969                 case ArgOnStack:
1970                 case ArgValuetypeInReg:
1971                 case ArgValuetypeAddrInIReg:
1972                         if (ainfo->storage == ArgOnStack && call->tail_call) {
1973                                 MonoInst *call_inst = (MonoInst*)call;
1974                                 cfg->args [i]->flags |= MONO_INST_VOLATILE;
1975                                 EMIT_NEW_ARGSTORE (cfg, call_inst, i, in);
1976                         } else if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1977                                 guint32 align;
1978                                 guint32 size;
1979
1980                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1981                                         size = sizeof (MonoTypedRef);
1982                                         align = sizeof (gpointer);
1983                                 }
1984                                 else {
1985                                         if (sig->pinvoke)
1986                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1987                                         else {
1988                                                 /* 
1989                                                  * Other backends use mono_type_stack_size (), but that
1990                                                  * aligns the size to 8, which is larger than the size of
1991                                                  * the source, leading to reads of invalid memory if the
1992                                                  * source is at the end of address space.
1993                                                  */
1994                                                 size = mono_class_value_size (in->klass, &align);
1995                                         }
1996                                 }
1997                                 g_assert (in->klass);
1998
1999                                 if (ainfo->storage == ArgOnStack && size >= 10000) {
2000                                         /* Avoid asserts in emit_memcpy () */
2001                                         cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
2002                                         cfg->exception_message = g_strdup_printf ("Passing an argument of size '%d'.", size);
2003                                         /* Continue normally */
2004                                 }
2005
2006                                 if (size > 0) {
2007                                         MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
2008                                         arg->sreg1 = in->dreg;
2009                                         arg->klass = in->klass;
2010                                         arg->backend.size = size;
2011                                         arg->inst_p0 = call;
2012                                         arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
2013                                         memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
2014
2015                                         MONO_ADD_INS (cfg->cbb, arg);
2016                                 }
2017                         } else {
2018                                 if (cfg->arch.no_pushes) {
2019                                         /* Already done */
2020                                 } else {
2021                                         MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
2022                                         arg->sreg1 = in->dreg;
2023                                         if (!sig->params [i - sig->hasthis]->byref) {
2024                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4) {
2025                                                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
2026                                                         arg->opcode = OP_STORER4_MEMBASE_REG;
2027                                                         arg->inst_destbasereg = X86_ESP;
2028                                                         arg->inst_offset = 0;
2029                                                 } else if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8) {
2030                                                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
2031                                                         arg->opcode = OP_STORER8_MEMBASE_REG;
2032                                                         arg->inst_destbasereg = X86_ESP;
2033                                                         arg->inst_offset = 0;
2034                                                 }
2035                                         }
2036                                         MONO_ADD_INS (cfg->cbb, arg);
2037                                 }
2038                         }
2039                         break;
2040                 default:
2041                         g_assert_not_reached ();
2042                 }
2043
2044                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos))
2045                         /* Emit the signature cookie just before the implicit arguments */
2046                         emit_sig_cookie (cfg, call, cinfo);
2047         }
2048
2049         /* Handle the case where there are no implicit arguments */
2050         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
2051                 emit_sig_cookie (cfg, call, cinfo);
2052
2053         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
2054                 MonoInst *vtarg;
2055
2056                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2057                         if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
2058                                 /*
2059                                  * Tell the JIT to use a more efficient calling convention: call using
2060                                  * OP_CALL, compute the result location after the call, and save the 
2061                                  * result there.
2062                                  */
2063                                 call->vret_in_reg = TRUE;
2064                                 /* 
2065                                  * Nullify the instruction computing the vret addr to enable 
2066                                  * future optimizations.
2067                                  */
2068                                 if (call->vret_var)
2069                                         NULLIFY_INS (call->vret_var);
2070                         } else {
2071                                 if (call->tail_call)
2072                                         NOT_IMPLEMENTED;
2073                                 /*
2074                                  * The valuetype is in RAX:RDX after the call, need to be copied to
2075                                  * the stack. Push the address here, so the call instruction can
2076                                  * access it.
2077                                  */
2078                                 if (!cfg->arch.vret_addr_loc) {
2079                                         cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
2080                                         /* Prevent it from being register allocated or optimized away */
2081                                         ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
2082                                 }
2083
2084                                 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
2085                         }
2086                 }
2087                 else {
2088                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
2089                         vtarg->sreg1 = call->vret_var->dreg;
2090                         vtarg->dreg = mono_alloc_preg (cfg);
2091                         MONO_ADD_INS (cfg->cbb, vtarg);
2092
2093                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
2094                 }
2095         }
2096
2097 #ifdef HOST_WIN32
2098         if (call->inst.opcode != OP_JMP && OP_TAILCALL != call->inst.opcode) {
2099                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 0x20);
2100         }
2101 #endif
2102
2103         if (cfg->method->save_lmf) {
2104                 MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF);
2105                 MONO_ADD_INS (cfg->cbb, arg);
2106         }
2107
2108         call->stack_usage = cinfo->stack_usage;
2109 }
2110
2111 void
2112 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
2113 {
2114         MonoInst *arg;
2115         MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
2116         ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
2117         int size = ins->backend.size;
2118
2119         if (ainfo->storage == ArgValuetypeInReg) {
2120                 MonoInst *load;
2121                 int part;
2122
2123                 for (part = 0; part < 2; ++part) {
2124                         if (ainfo->pair_storage [part] == ArgNone)
2125                                 continue;
2126
2127                         MONO_INST_NEW (cfg, load, arg_storage_to_load_membase (ainfo->pair_storage [part]));
2128                         load->inst_basereg = src->dreg;
2129                         load->inst_offset = part * sizeof (gpointer);
2130
2131                         switch (ainfo->pair_storage [part]) {
2132                         case ArgInIReg:
2133                                 load->dreg = mono_alloc_ireg (cfg);
2134                                 break;
2135                         case ArgInDoubleSSEReg:
2136                         case ArgInFloatSSEReg:
2137                                 load->dreg = mono_alloc_freg (cfg);
2138                                 break;
2139                         default:
2140                                 g_assert_not_reached ();
2141                         }
2142                         MONO_ADD_INS (cfg->cbb, load);
2143
2144                         add_outarg_reg (cfg, call, ainfo->pair_storage [part], ainfo->pair_regs [part], load);
2145                 }
2146         } else if (ainfo->storage == ArgValuetypeAddrInIReg) {
2147                 MonoInst *vtaddr, *load;
2148                 vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
2149                 
2150                 g_assert (!cfg->arch.no_pushes);
2151
2152                 MONO_INST_NEW (cfg, load, OP_LDADDR);
2153                 load->inst_p0 = vtaddr;
2154                 vtaddr->flags |= MONO_INST_INDIRECT;
2155                 load->type = STACK_MP;
2156                 load->klass = vtaddr->klass;
2157                 load->dreg = mono_alloc_ireg (cfg);
2158                 MONO_ADD_INS (cfg->cbb, load);
2159                 mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 4);
2160
2161                 if (ainfo->pair_storage [0] == ArgInIReg) {
2162                         MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE);
2163                         arg->dreg = mono_alloc_ireg (cfg);
2164                         arg->sreg1 = load->dreg;
2165                         arg->inst_imm = 0;
2166                         MONO_ADD_INS (cfg->cbb, arg);
2167                         mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, ainfo->pair_regs [0], FALSE);
2168                 } else {
2169                         MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
2170                         arg->sreg1 = load->dreg;
2171                         MONO_ADD_INS (cfg->cbb, arg);
2172                 }
2173         } else {
2174                 if (size == 8) {
2175                         if (cfg->arch.no_pushes) {
2176                                 int dreg = mono_alloc_ireg (cfg);
2177
2178                                 MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
2179                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, dreg);
2180                         } else {
2181                                 /* Can't use this for < 8 since it does an 8 byte memory load */
2182                                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
2183                                 arg->inst_basereg = src->dreg;
2184                                 arg->inst_offset = 0;
2185                                 MONO_ADD_INS (cfg->cbb, arg);
2186                         }
2187                 } else if (size <= 40) {
2188                         if (cfg->arch.no_pushes) {
2189                                 mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2190                         } else {
2191                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 8));
2192                                 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
2193                         }
2194                 } else {
2195                         if (cfg->arch.no_pushes) {
2196                                 // FIXME: Code growth
2197                                 mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2198                         } else {
2199                                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
2200                                 arg->inst_basereg = src->dreg;
2201                                 arg->inst_offset = 0;
2202                                 arg->inst_imm = size;
2203                                 MONO_ADD_INS (cfg->cbb, arg);
2204                         }
2205                 }
2206         }
2207 }
2208
2209 void
2210 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
2211 {
2212         MonoType *ret = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret);
2213
2214         if (ret->type == MONO_TYPE_R4) {
2215                 if (COMPILE_LLVM (cfg))
2216                         MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2217                 else
2218                         MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg);
2219                 return;
2220         } else if (ret->type == MONO_TYPE_R8) {
2221                 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2222                 return;
2223         }
2224                         
2225         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
2226 }
2227
2228 #endif /* DISABLE_JIT */
2229
2230 #define EMIT_COND_BRANCH(ins,cond,sign) \
2231         if (ins->inst_true_bb->native_offset) { \
2232                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
2233         } else { \
2234                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
2235                 if ((cfg->opt & MONO_OPT_BRANCH) && \
2236             x86_is_imm8 (ins->inst_true_bb->max_offset - offset)) \
2237                         x86_branch8 (code, cond, 0, sign); \
2238                 else \
2239                         x86_branch32 (code, cond, 0, sign); \
2240 }
2241
2242 typedef struct {
2243         MonoMethodSignature *sig;
2244         CallInfo *cinfo;
2245 } ArchDynCallInfo;
2246
2247 typedef struct {
2248         mgreg_t regs [PARAM_REGS];
2249         mgreg_t res;
2250         guint8 *ret;
2251 } DynCallArgs;
2252
2253 static gboolean
2254 dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo)
2255 {
2256         int i;
2257
2258 #ifdef HOST_WIN32
2259         return FALSE;
2260 #endif
2261
2262         switch (cinfo->ret.storage) {
2263         case ArgNone:
2264         case ArgInIReg:
2265                 break;
2266         case ArgValuetypeInReg: {
2267                 ArgInfo *ainfo = &cinfo->ret;
2268
2269                 if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2270                         return FALSE;
2271                 if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2272                         return FALSE;
2273                 break;
2274         }
2275         default:
2276                 return FALSE;
2277         }
2278
2279         for (i = 0; i < cinfo->nargs; ++i) {
2280                 ArgInfo *ainfo = &cinfo->args [i];
2281                 switch (ainfo->storage) {
2282                 case ArgInIReg:
2283                         break;
2284                 case ArgValuetypeInReg:
2285                         if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2286                                 return FALSE;
2287                         if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2288                                 return FALSE;
2289                         break;
2290                 default:
2291                         return FALSE;
2292                 }
2293         }
2294
2295         return TRUE;
2296 }
2297
2298 /*
2299  * mono_arch_dyn_call_prepare:
2300  *
2301  *   Return a pointer to an arch-specific structure which contains information 
2302  * needed by mono_arch_get_dyn_call_args (). Return NULL if OP_DYN_CALL is not
2303  * supported for SIG.
2304  * This function is equivalent to ffi_prep_cif in libffi.
2305  */
2306 MonoDynCallInfo*
2307 mono_arch_dyn_call_prepare (MonoMethodSignature *sig)
2308 {
2309         ArchDynCallInfo *info;
2310         CallInfo *cinfo;
2311
2312         cinfo = get_call_info (NULL, NULL, sig, FALSE);
2313
2314         if (!dyn_call_supported (sig, cinfo)) {
2315                 g_free (cinfo);
2316                 return NULL;
2317         }
2318
2319         info = g_new0 (ArchDynCallInfo, 1);
2320         // FIXME: Preprocess the info to speed up get_dyn_call_args ().
2321         info->sig = sig;
2322         info->cinfo = cinfo;
2323         
2324         return (MonoDynCallInfo*)info;
2325 }
2326
2327 /*
2328  * mono_arch_dyn_call_free:
2329  *
2330  *   Free a MonoDynCallInfo structure.
2331  */
2332 void
2333 mono_arch_dyn_call_free (MonoDynCallInfo *info)
2334 {
2335         ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
2336
2337         g_free (ainfo->cinfo);
2338         g_free (ainfo);
2339 }
2340
2341 /*
2342  * mono_arch_get_start_dyn_call:
2343  *
2344  *   Convert the arguments ARGS to a format which can be passed to OP_DYN_CALL, and
2345  * store the result into BUF.
2346  * ARGS should be an array of pointers pointing to the arguments.
2347  * RET should point to a memory buffer large enought to hold the result of the
2348  * call.
2349  * This function should be as fast as possible, any work which does not depend
2350  * on the actual values of the arguments should be done in 
2351  * mono_arch_dyn_call_prepare ().
2352  * start_dyn_call + OP_DYN_CALL + finish_dyn_call is equivalent to ffi_call in
2353  * libffi.
2354  */
2355 void
2356 mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
2357 {
2358         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2359         DynCallArgs *p = (DynCallArgs*)buf;
2360         int arg_index, greg, i;
2361         MonoMethodSignature *sig = dinfo->sig;
2362
2363         g_assert (buf_len >= sizeof (DynCallArgs));
2364
2365         p->res = 0;
2366         p->ret = ret;
2367
2368         arg_index = 0;
2369         greg = 0;
2370
2371         if (dinfo->cinfo->vtype_retaddr)
2372                 p->regs [greg ++] = (mgreg_t)ret;
2373
2374         if (sig->hasthis) {
2375                 p->regs [greg ++] = (mgreg_t)*(args [arg_index ++]);
2376         }
2377
2378         for (i = 0; i < sig->param_count; i++) {
2379                 MonoType *t = mono_type_get_underlying_type (sig->params [i]);
2380                 gpointer *arg = args [arg_index ++];
2381
2382                 if (t->byref) {
2383                         p->regs [greg ++] = (mgreg_t)*(arg);
2384                         continue;
2385                 }
2386
2387                 switch (t->type) {
2388                 case MONO_TYPE_STRING:
2389                 case MONO_TYPE_CLASS:  
2390                 case MONO_TYPE_ARRAY:
2391                 case MONO_TYPE_SZARRAY:
2392                 case MONO_TYPE_OBJECT:
2393                 case MONO_TYPE_PTR:
2394                 case MONO_TYPE_I:
2395                 case MONO_TYPE_U:
2396                 case MONO_TYPE_I8:
2397                 case MONO_TYPE_U8:
2398                         g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
2399                         p->regs [greg ++] = (mgreg_t)*(arg);
2400                         break;
2401                 case MONO_TYPE_BOOLEAN:
2402                 case MONO_TYPE_U1:
2403                         p->regs [greg ++] = *(guint8*)(arg);
2404                         break;
2405                 case MONO_TYPE_I1:
2406                         p->regs [greg ++] = *(gint8*)(arg);
2407                         break;
2408                 case MONO_TYPE_I2:
2409                         p->regs [greg ++] = *(gint16*)(arg);
2410                         break;
2411                 case MONO_TYPE_U2:
2412                 case MONO_TYPE_CHAR:
2413                         p->regs [greg ++] = *(guint16*)(arg);
2414                         break;
2415                 case MONO_TYPE_I4:
2416                         p->regs [greg ++] = *(gint32*)(arg);
2417                         break;
2418                 case MONO_TYPE_U4:
2419                         p->regs [greg ++] = *(guint32*)(arg);
2420                         break;
2421                 case MONO_TYPE_GENERICINST:
2422                     if (MONO_TYPE_IS_REFERENCE (t)) {
2423                                 p->regs [greg ++] = (mgreg_t)*(arg);
2424                                 break;
2425                         } else {
2426                                 /* Fall through */
2427                         }
2428                 case MONO_TYPE_VALUETYPE: {
2429                         ArgInfo *ainfo = &dinfo->cinfo->args [i + sig->hasthis];
2430
2431                         g_assert (ainfo->storage == ArgValuetypeInReg);
2432                         if (ainfo->pair_storage [0] != ArgNone) {
2433                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2434                                 p->regs [greg ++] = ((mgreg_t*)(arg))[0];
2435                         }
2436                         if (ainfo->pair_storage [1] != ArgNone) {
2437                                 g_assert (ainfo->pair_storage [1] == ArgInIReg);
2438                                 p->regs [greg ++] = ((mgreg_t*)(arg))[1];
2439                         }
2440                         break;
2441                 }
2442                 default:
2443                         g_assert_not_reached ();
2444                 }
2445         }
2446
2447         g_assert (greg <= PARAM_REGS);
2448 }
2449
2450 /*
2451  * mono_arch_finish_dyn_call:
2452  *
2453  *   Store the result of a dyn call into the return value buffer passed to
2454  * start_dyn_call ().
2455  * This function should be as fast as possible, any work which does not depend
2456  * on the actual values of the arguments should be done in 
2457  * mono_arch_dyn_call_prepare ().
2458  */
2459 void
2460 mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
2461 {
2462         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2463         MonoMethodSignature *sig = dinfo->sig;
2464         guint8 *ret = ((DynCallArgs*)buf)->ret;
2465         mgreg_t res = ((DynCallArgs*)buf)->res;
2466
2467         switch (mono_type_get_underlying_type (sig->ret)->type) {
2468         case MONO_TYPE_VOID:
2469                 *(gpointer*)ret = NULL;
2470                 break;
2471         case MONO_TYPE_STRING:
2472         case MONO_TYPE_CLASS:  
2473         case MONO_TYPE_ARRAY:
2474         case MONO_TYPE_SZARRAY:
2475         case MONO_TYPE_OBJECT:
2476         case MONO_TYPE_I:
2477         case MONO_TYPE_U:
2478         case MONO_TYPE_PTR:
2479                 *(gpointer*)ret = (gpointer)res;
2480                 break;
2481         case MONO_TYPE_I1:
2482                 *(gint8*)ret = res;
2483                 break;
2484         case MONO_TYPE_U1:
2485         case MONO_TYPE_BOOLEAN:
2486                 *(guint8*)ret = res;
2487                 break;
2488         case MONO_TYPE_I2:
2489                 *(gint16*)ret = res;
2490                 break;
2491         case MONO_TYPE_U2:
2492         case MONO_TYPE_CHAR:
2493                 *(guint16*)ret = res;
2494                 break;
2495         case MONO_TYPE_I4:
2496                 *(gint32*)ret = res;
2497                 break;
2498         case MONO_TYPE_U4:
2499                 *(guint32*)ret = res;
2500                 break;
2501         case MONO_TYPE_I8:
2502                 *(gint64*)ret = res;
2503                 break;
2504         case MONO_TYPE_U8:
2505                 *(guint64*)ret = res;
2506                 break;
2507         case MONO_TYPE_GENERICINST:
2508                 if (MONO_TYPE_IS_REFERENCE (sig->ret)) {
2509                         *(gpointer*)ret = (gpointer)res;
2510                         break;
2511                 } else {
2512                         /* Fall through */
2513                 }
2514         case MONO_TYPE_VALUETYPE:
2515                 if (dinfo->cinfo->vtype_retaddr) {
2516                         /* Nothing to do */
2517                 } else {
2518                         ArgInfo *ainfo = &dinfo->cinfo->ret;
2519
2520                         g_assert (ainfo->storage == ArgValuetypeInReg);
2521
2522                         if (ainfo->pair_storage [0] != ArgNone) {
2523                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2524                                 ((mgreg_t*)ret)[0] = res;
2525                         }
2526
2527                         g_assert (ainfo->pair_storage [1] == ArgNone);
2528                 }
2529                 break;
2530         default:
2531                 g_assert_not_reached ();
2532         }
2533 }
2534
2535 /* emit an exception if condition is fail */
2536 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
2537         do {                                                        \
2538                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
2539                 if (tins == NULL) {                                                                             \
2540                         mono_add_patch_info (cfg, code - cfg->native_code,   \
2541                                         MONO_PATCH_INFO_EXC, exc_name);  \
2542                         x86_branch32 (code, cond, 0, signed);               \
2543                 } else {        \
2544                         EMIT_COND_BRANCH (tins, cond, signed);  \
2545                 }                       \
2546         } while (0); 
2547
2548 #define EMIT_FPCOMPARE(code) do { \
2549         amd64_fcompp (code); \
2550         amd64_fnstsw (code); \
2551 } while (0); 
2552
2553 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
2554     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
2555         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
2556         amd64_ ##op (code); \
2557         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
2558         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
2559 } while (0);
2560
2561 static guint8*
2562 emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
2563 {
2564         gboolean no_patch = FALSE;
2565
2566         /* 
2567          * FIXME: Add support for thunks
2568          */
2569         {
2570                 gboolean near_call = FALSE;
2571
2572                 /*
2573                  * Indirect calls are expensive so try to make a near call if possible.
2574                  * The caller memory is allocated by the code manager so it is 
2575                  * guaranteed to be at a 32 bit offset.
2576                  */
2577
2578                 if (patch_type != MONO_PATCH_INFO_ABS) {
2579                         /* The target is in memory allocated using the code manager */
2580                         near_call = TRUE;
2581
2582                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
2583                                 if (((MonoMethod*)data)->klass->image->aot_module)
2584                                         /* The callee might be an AOT method */
2585                                         near_call = FALSE;
2586                                 if (((MonoMethod*)data)->dynamic)
2587                                         /* The target is in malloc-ed memory */
2588                                         near_call = FALSE;
2589                         }
2590
2591                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
2592                                 /* 
2593                                  * The call might go directly to a native function without
2594                                  * the wrapper.
2595                                  */
2596                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name (data);
2597                                 if (mi) {
2598                                         gconstpointer target = mono_icall_get_wrapper (mi);
2599                                         if ((((guint64)target) >> 32) != 0)
2600                                                 near_call = FALSE;
2601                                 }
2602                         }
2603                 }
2604                 else {
2605                         if (cfg->abs_patches && g_hash_table_lookup (cfg->abs_patches, data)) {
2606                                 /* 
2607                                  * This is not really an optimization, but required because the
2608                                  * generic class init trampolines use R11 to pass the vtable.
2609                                  */
2610                                 near_call = TRUE;
2611                         } else {
2612                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
2613                                 if (info) {
2614                                         if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && 
2615                                                 strstr (cfg->method->name, info->name)) {
2616                                                 /* A call to the wrapped function */
2617                                                 if ((((guint64)data) >> 32) == 0)
2618                                                         near_call = TRUE;
2619                                                 no_patch = TRUE;
2620                                         }
2621                                         else if (info->func == info->wrapper) {
2622                                                 /* No wrapper */
2623                                                 if ((((guint64)info->func) >> 32) == 0)
2624                                                         near_call = TRUE;
2625                                         }
2626                                         else {
2627                                                 /* See the comment in mono_codegen () */
2628                                                 if ((info->name [0] != 'v') || (strstr (info->name, "ves_array_new_va_") == NULL && strstr (info->name, "ves_array_element_address_") == NULL))
2629                                                         near_call = TRUE;
2630                                         }
2631                                 }
2632                                 else if ((((guint64)data) >> 32) == 0) {
2633                                         near_call = TRUE;
2634                                         no_patch = TRUE;
2635                                 }
2636                         }
2637                 }
2638
2639                 if (cfg->method->dynamic)
2640                         /* These methods are allocated using malloc */
2641                         near_call = FALSE;
2642
2643 #ifdef MONO_ARCH_NOMAP32BIT
2644                 near_call = FALSE;
2645 #endif
2646
2647                 /* The 64bit XEN kernel does not honour the MAP_32BIT flag. (#522894) */
2648                 if (optimize_for_xen)
2649                         near_call = FALSE;
2650
2651                 if (cfg->compile_aot) {
2652                         near_call = TRUE;
2653                         no_patch = TRUE;
2654                 }
2655
2656                 if (near_call) {
2657                         /* 
2658                          * Align the call displacement to an address divisible by 4 so it does
2659                          * not span cache lines. This is required for code patching to work on SMP
2660                          * systems.
2661                          */
2662                         if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0)
2663                                 amd64_padding (code, 4 - ((guint32)(code + 1 - cfg->native_code) % 4));
2664                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
2665                         amd64_call_code (code, 0);
2666                 }
2667                 else {
2668                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
2669                         amd64_set_reg_template (code, GP_SCRATCH_REG);
2670                         amd64_call_reg (code, GP_SCRATCH_REG);
2671                 }
2672         }
2673
2674         return code;
2675 }
2676
2677 static inline guint8*
2678 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data, gboolean win64_adjust_stack)
2679 {
2680 #ifdef HOST_WIN32
2681         if (win64_adjust_stack)
2682                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 32);
2683 #endif
2684         code = emit_call_body (cfg, code, patch_type, data);
2685 #ifdef HOST_WIN32
2686         if (win64_adjust_stack)
2687                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 32);
2688 #endif  
2689         
2690         return code;
2691 }
2692
2693 static inline int
2694 store_membase_imm_to_store_membase_reg (int opcode)
2695 {
2696         switch (opcode) {
2697         case OP_STORE_MEMBASE_IMM:
2698                 return OP_STORE_MEMBASE_REG;
2699         case OP_STOREI4_MEMBASE_IMM:
2700                 return OP_STOREI4_MEMBASE_REG;
2701         case OP_STOREI8_MEMBASE_IMM:
2702                 return OP_STOREI8_MEMBASE_REG;
2703         }
2704
2705         return -1;
2706 }
2707
2708 #ifndef DISABLE_JIT
2709
2710 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
2711
2712 /*
2713  * mono_arch_peephole_pass_1:
2714  *
2715  *   Perform peephole opts which should/can be performed before local regalloc
2716  */
2717 void
2718 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
2719 {
2720         MonoInst *ins, *n;
2721
2722         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
2723                 MonoInst *last_ins = ins->prev;
2724
2725                 switch (ins->opcode) {
2726                 case OP_ADD_IMM:
2727                 case OP_IADD_IMM:
2728                 case OP_LADD_IMM:
2729                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS) && (ins->inst_imm > 0)) {
2730                                 /* 
2731                                  * X86_LEA is like ADD, but doesn't have the
2732                                  * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends 
2733                                  * its operand to 64 bit.
2734                                  */
2735                                 ins->opcode = OP_X86_LEA_MEMBASE;
2736                                 ins->inst_basereg = ins->sreg1;
2737                         }
2738                         break;
2739                 case OP_LXOR:
2740                 case OP_IXOR:
2741                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2742                                 MonoInst *ins2;
2743
2744                                 /* 
2745                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
2746                                  * the latter has length 2-3 instead of 6 (reverse constant
2747                                  * propagation). These instruction sequences are very common
2748                                  * in the initlocals bblock.
2749                                  */
2750                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
2751                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
2752                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
2753                                                 ins2->sreg1 = ins->dreg;
2754                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG)) {
2755                                                 /* Continue */
2756                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
2757                                                 NULLIFY_INS (ins2);
2758                                                 /* Continue */
2759                                         } else {
2760                                                 break;
2761                                         }
2762                                 }
2763                         }
2764                         break;
2765                 case OP_COMPARE_IMM:
2766                 case OP_LCOMPARE_IMM:
2767                         /* OP_COMPARE_IMM (reg, 0) 
2768                          * --> 
2769                          * OP_AMD64_TEST_NULL (reg) 
2770                          */
2771                         if (!ins->inst_imm)
2772                                 ins->opcode = OP_AMD64_TEST_NULL;
2773                         break;
2774                 case OP_ICOMPARE_IMM:
2775                         if (!ins->inst_imm)
2776                                 ins->opcode = OP_X86_TEST_NULL;
2777                         break;
2778                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2779                         /* 
2780                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
2781                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
2782                          * -->
2783                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
2784                          * OP_COMPARE_IMM reg, imm
2785                          *
2786                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
2787                          */
2788                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
2789                             ins->inst_basereg == last_ins->inst_destbasereg &&
2790                             ins->inst_offset == last_ins->inst_offset) {
2791                                         ins->opcode = OP_ICOMPARE_IMM;
2792                                         ins->sreg1 = last_ins->sreg1;
2793
2794                                         /* check if we can remove cmp reg,0 with test null */
2795                                         if (!ins->inst_imm)
2796                                                 ins->opcode = OP_X86_TEST_NULL;
2797                                 }
2798
2799                         break;
2800                 }
2801
2802                 mono_peephole_ins (bb, ins);
2803         }
2804 }
2805
2806 void
2807 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
2808 {
2809         MonoInst *ins, *n;
2810
2811         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
2812                 switch (ins->opcode) {
2813                 case OP_ICONST:
2814                 case OP_I8CONST: {
2815                         /* reg = 0 -> XOR (reg, reg) */
2816                         /* XOR sets cflags on x86, so we cant do it always */
2817                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
2818                                 ins->opcode = OP_LXOR;
2819                                 ins->sreg1 = ins->dreg;
2820                                 ins->sreg2 = ins->dreg;
2821                                 /* Fall through */
2822                         } else {
2823                                 break;
2824                         }
2825                 }
2826                 case OP_LXOR:
2827                         /*
2828                          * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the 
2829                          * 0 result into 64 bits.
2830                          */
2831                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2832                                 ins->opcode = OP_IXOR;
2833                         }
2834                         /* Fall through */
2835                 case OP_IXOR:
2836                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2837                                 MonoInst *ins2;
2838
2839                                 /* 
2840                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
2841                                  * the latter has length 2-3 instead of 6 (reverse constant
2842                                  * propagation). These instruction sequences are very common
2843                                  * in the initlocals bblock.
2844                                  */
2845                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
2846                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
2847                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
2848                                                 ins2->sreg1 = ins->dreg;
2849                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_REG) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG) || (ins2->opcode == OP_LIVERANGE_START)) {
2850                                                 /* Continue */
2851                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
2852                                                 NULLIFY_INS (ins2);
2853                                                 /* Continue */
2854                                         } else {
2855                                                 break;
2856                                         }
2857                                 }
2858                         }
2859                         break;
2860                 case OP_IADD_IMM:
2861                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
2862                                 ins->opcode = OP_X86_INC_REG;
2863                         break;
2864                 case OP_ISUB_IMM:
2865                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
2866                                 ins->opcode = OP_X86_DEC_REG;
2867                         break;
2868                 }
2869
2870                 mono_peephole_ins (bb, ins);
2871         }
2872 }
2873
2874 #define NEW_INS(cfg,ins,dest,op) do {   \
2875                 MONO_INST_NEW ((cfg), (dest), (op)); \
2876         (dest)->cil_code = (ins)->cil_code; \
2877         mono_bblock_insert_before_ins (bb, ins, (dest)); \
2878         } while (0)
2879
2880 /*
2881  * mono_arch_lowering_pass:
2882  *
2883  *  Converts complex opcodes into simpler ones so that each IR instruction
2884  * corresponds to one machine instruction.
2885  */
2886 void
2887 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
2888 {
2889         MonoInst *ins, *n, *temp;
2890
2891         /*
2892          * FIXME: Need to add more instructions, but the current machine 
2893          * description can't model some parts of the composite instructions like
2894          * cdq.
2895          */
2896         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
2897                 switch (ins->opcode) {
2898                 case OP_DIV_IMM:
2899                 case OP_REM_IMM:
2900                 case OP_IDIV_IMM:
2901                 case OP_IDIV_UN_IMM:
2902                 case OP_IREM_UN_IMM:
2903                         mono_decompose_op_imm (cfg, bb, ins);
2904                         break;
2905                 case OP_IREM_IMM:
2906                         /* Keep the opcode if we can implement it efficiently */
2907                         if (!((ins->inst_imm > 0) && (mono_is_power_of_two (ins->inst_imm) != -1)))
2908                                 mono_decompose_op_imm (cfg, bb, ins);
2909                         break;
2910                 case OP_COMPARE_IMM:
2911                 case OP_LCOMPARE_IMM:
2912                         if (!amd64_is_imm32 (ins->inst_imm)) {
2913                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
2914                                 temp->inst_c0 = ins->inst_imm;
2915                                 temp->dreg = mono_alloc_ireg (cfg);
2916                                 ins->opcode = OP_COMPARE;
2917                                 ins->sreg2 = temp->dreg;
2918                         }
2919                         break;
2920                 case OP_LOAD_MEMBASE:
2921                 case OP_LOADI8_MEMBASE:
2922                         if (!amd64_is_imm32 (ins->inst_offset)) {
2923                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
2924                                 temp->inst_c0 = ins->inst_offset;
2925                                 temp->dreg = mono_alloc_ireg (cfg);
2926                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
2927                                 ins->inst_indexreg = temp->dreg;
2928                         }
2929                         break;
2930                 case OP_STORE_MEMBASE_IMM:
2931                 case OP_STOREI8_MEMBASE_IMM:
2932                         if (!amd64_is_imm32 (ins->inst_imm)) {
2933                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
2934                                 temp->inst_c0 = ins->inst_imm;
2935                                 temp->dreg = mono_alloc_ireg (cfg);
2936                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
2937                                 ins->sreg1 = temp->dreg;
2938                         }
2939                         break;
2940 #ifdef MONO_ARCH_SIMD_INTRINSICS
2941                 case OP_EXPAND_I1: {
2942                                 int temp_reg1 = mono_alloc_ireg (cfg);
2943                                 int temp_reg2 = mono_alloc_ireg (cfg);
2944                                 int original_reg = ins->sreg1;
2945
2946                                 NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
2947                                 temp->sreg1 = original_reg;
2948                                 temp->dreg = temp_reg1;
2949
2950                                 NEW_INS (cfg, ins, temp, OP_SHL_IMM);
2951                                 temp->sreg1 = temp_reg1;
2952                                 temp->dreg = temp_reg2;
2953                                 temp->inst_imm = 8;
2954
2955                                 NEW_INS (cfg, ins, temp, OP_LOR);
2956                                 temp->sreg1 = temp->dreg = temp_reg2;
2957                                 temp->sreg2 = temp_reg1;
2958
2959                                 ins->opcode = OP_EXPAND_I2;
2960                                 ins->sreg1 = temp_reg2;
2961                         }
2962                         break;
2963 #endif
2964                 default:
2965                         break;
2966                 }
2967         }
2968
2969         bb->max_vreg = cfg->next_vreg;
2970 }
2971
2972 static const int 
2973 branch_cc_table [] = {
2974         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
2975         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
2976         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
2977 };
2978
2979 /* Maps CMP_... constants to X86_CC_... constants */
2980 static const int
2981 cc_table [] = {
2982         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
2983         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
2984 };
2985
2986 static const int
2987 cc_signed_table [] = {
2988         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
2989         FALSE, FALSE, FALSE, FALSE
2990 };
2991
2992 /*#include "cprop.c"*/
2993
2994 static unsigned char*
2995 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
2996 {
2997         amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
2998
2999         if (size == 1)
3000                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
3001         else if (size == 2)
3002                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
3003         return code;
3004 }
3005
3006 static unsigned char*
3007 mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree)
3008 {
3009         int sreg = tree->sreg1;
3010         int need_touch = FALSE;
3011
3012 #if defined(HOST_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3013         if (!tree->flags & MONO_INST_INIT)
3014                 need_touch = TRUE;
3015 #endif
3016
3017         if (need_touch) {
3018                 guint8* br[5];
3019
3020                 /*
3021                  * Under Windows:
3022                  * If requested stack size is larger than one page,
3023                  * perform stack-touch operation
3024                  */
3025                 /*
3026                  * Generate stack probe code.
3027                  * Under Windows, it is necessary to allocate one page at a time,
3028                  * "touching" stack after each successful sub-allocation. This is
3029                  * because of the way stack growth is implemented - there is a
3030                  * guard page before the lowest stack page that is currently commited.
3031                  * Stack normally grows sequentially so OS traps access to the
3032                  * guard page and commits more pages when needed.
3033                  */
3034                 amd64_test_reg_imm (code, sreg, ~0xFFF);
3035                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3036
3037                 br[2] = code; /* loop */
3038                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3039                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3040                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
3041                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
3042                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
3043                 amd64_patch (br[3], br[2]);
3044                 amd64_test_reg_reg (code, sreg, sreg);
3045                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3046                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3047
3048                 br[1] = code; x86_jump8 (code, 0);
3049
3050                 amd64_patch (br[0], code);
3051                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3052                 amd64_patch (br[1], code);
3053                 amd64_patch (br[4], code);
3054         }
3055         else
3056                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
3057
3058         if (tree->flags & MONO_INST_INIT) {
3059                 int offset = 0;
3060                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
3061                         amd64_push_reg (code, AMD64_RAX);
3062                         offset += 8;
3063                 }
3064                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
3065                         amd64_push_reg (code, AMD64_RCX);
3066                         offset += 8;
3067                 }
3068                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
3069                         amd64_push_reg (code, AMD64_RDI);
3070                         offset += 8;
3071                 }
3072                 
3073                 amd64_shift_reg_imm (code, X86_SHR, sreg, 3);
3074                 if (sreg != AMD64_RCX)
3075                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
3076                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3077                                 
3078                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
3079                 if (cfg->param_area && cfg->arch.no_pushes)
3080                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RDI, cfg->param_area);
3081                 amd64_cld (code);
3082                 amd64_prefix (code, X86_REP_PREFIX);
3083                 amd64_stosl (code);
3084                 
3085                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
3086                         amd64_pop_reg (code, AMD64_RDI);
3087                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
3088                         amd64_pop_reg (code, AMD64_RCX);
3089                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
3090                         amd64_pop_reg (code, AMD64_RAX);
3091         }
3092         return code;
3093 }
3094
3095 static guint8*
3096 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
3097 {
3098         CallInfo *cinfo;
3099         guint32 quad;
3100
3101         /* Move return value to the target register */
3102         /* FIXME: do this in the local reg allocator */
3103         switch (ins->opcode) {
3104         case OP_CALL:
3105         case OP_CALL_REG:
3106         case OP_CALL_MEMBASE:
3107         case OP_LCALL:
3108         case OP_LCALL_REG:
3109         case OP_LCALL_MEMBASE:
3110                 g_assert (ins->dreg == AMD64_RAX);
3111                 break;
3112         case OP_FCALL:
3113         case OP_FCALL_REG:
3114         case OP_FCALL_MEMBASE:
3115                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
3116                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
3117                 }
3118                 else {
3119                         if (ins->dreg != AMD64_XMM0)
3120                                 amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
3121                 }
3122                 break;
3123         case OP_VCALL:
3124         case OP_VCALL_REG:
3125         case OP_VCALL_MEMBASE:
3126         case OP_VCALL2:
3127         case OP_VCALL2_REG:
3128         case OP_VCALL2_MEMBASE:
3129                 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
3130                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3131                         MonoInst *loc = cfg->arch.vret_addr_loc;
3132
3133                         /* Load the destination address */
3134                         g_assert (loc->opcode == OP_REGOFFSET);
3135                         amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, 8);
3136
3137                         for (quad = 0; quad < 2; quad ++) {
3138                                 switch (cinfo->ret.pair_storage [quad]) {
3139                                 case ArgInIReg:
3140                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
3141                                         break;
3142                                 case ArgInFloatSSEReg:
3143                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3144                                         break;
3145                                 case ArgInDoubleSSEReg:
3146                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3147                                         break;
3148                                 case ArgNone:
3149                                         break;
3150                                 default:
3151                                         NOT_IMPLEMENTED;
3152                                 }
3153                         }
3154                 }
3155                 break;
3156         }
3157
3158         return code;
3159 }
3160
3161 #endif /* DISABLE_JIT */
3162
3163 /*
3164  * mono_amd64_emit_tls_get:
3165  * @code: buffer to store code to
3166  * @dreg: hard register where to place the result
3167  * @tls_offset: offset info
3168  *
3169  * mono_amd64_emit_tls_get emits in @code the native code that puts in
3170  * the dreg register the item in the thread local storage identified
3171  * by tls_offset.
3172  *
3173  * Returns: a pointer to the end of the stored code
3174  */
3175 guint8*
3176 mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
3177 {
3178 #ifdef HOST_WIN32
3179         g_assert (tls_offset < 64);
3180         x86_prefix (code, X86_GS_PREFIX);
3181         amd64_mov_reg_mem (code, dreg, (tls_offset * 8) + 0x1480, 8);
3182 #else
3183         if (optimize_for_xen) {
3184                 x86_prefix (code, X86_FS_PREFIX);
3185                 amd64_mov_reg_mem (code, dreg, 0, 8);
3186                 amd64_mov_reg_membase (code, dreg, dreg, tls_offset, 8);
3187         } else {
3188                 x86_prefix (code, X86_FS_PREFIX);
3189                 amd64_mov_reg_mem (code, dreg, tls_offset, 8);
3190         }
3191 #endif
3192         return code;
3193 }
3194
3195 #define REAL_PRINT_REG(text,reg) \
3196 mono_assert (reg >= 0); \
3197 amd64_push_reg (code, AMD64_RAX); \
3198 amd64_push_reg (code, AMD64_RDX); \
3199 amd64_push_reg (code, AMD64_RCX); \
3200 amd64_push_reg (code, reg); \
3201 amd64_push_imm (code, reg); \
3202 amd64_push_imm (code, text " %d %p\n"); \
3203 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
3204 amd64_call_reg (code, AMD64_RAX); \
3205 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
3206 amd64_pop_reg (code, AMD64_RCX); \
3207 amd64_pop_reg (code, AMD64_RDX); \
3208 amd64_pop_reg (code, AMD64_RAX);
3209
3210 /* benchmark and set based on cpu */
3211 #define LOOP_ALIGNMENT 8
3212 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
3213
3214 #ifndef DISABLE_JIT
3215
3216 void
3217 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3218 {
3219         MonoInst *ins;
3220         MonoCallInst *call;
3221         guint offset;
3222         guint8 *code = cfg->native_code + cfg->code_len;
3223         MonoInst *last_ins = NULL;
3224         guint last_offset = 0;
3225         int max_len;
3226
3227         /* Fix max_offset estimate for each successor bb */
3228         if (cfg->opt & MONO_OPT_BRANCH) {
3229                 int current_offset = cfg->code_len;
3230                 MonoBasicBlock *current_bb;
3231                 for (current_bb = bb; current_bb != NULL; current_bb = current_bb->next_bb) {
3232                         current_bb->max_offset = current_offset;
3233                         current_offset += current_bb->max_length;
3234                 }
3235         }
3236
3237         if (cfg->opt & MONO_OPT_LOOP) {
3238                 int pad, align = LOOP_ALIGNMENT;
3239                 /* set alignment depending on cpu */
3240                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
3241                         pad = align - pad;
3242                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
3243                         amd64_padding (code, pad);
3244                         cfg->code_len += pad;
3245                         bb->native_offset = cfg->code_len;
3246                 }
3247         }
3248
3249         if (cfg->verbose_level > 2)
3250                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
3251
3252         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
3253                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
3254                 g_assert (!cfg->compile_aot);
3255
3256                 cov->data [bb->dfn].cil_code = bb->cil_code;
3257                 amd64_mov_reg_imm (code, AMD64_R11, (guint64)&cov->data [bb->dfn].count);
3258                 /* this is not thread save, but good enough */
3259                 amd64_inc_membase (code, AMD64_R11, 0);
3260         }
3261
3262         offset = code - cfg->native_code;
3263
3264         mono_debug_open_block (cfg, bb, offset);
3265
3266     if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
3267                 x86_breakpoint (code);
3268
3269         MONO_BB_FOR_EACH_INS (bb, ins) {
3270                 offset = code - cfg->native_code;
3271
3272                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3273
3274                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
3275                         cfg->code_size *= 2;
3276                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3277                         code = cfg->native_code + offset;
3278                         mono_jit_stats.code_reallocs++;
3279                 }
3280
3281                 if (cfg->debug_info)
3282                         mono_debug_record_line_number (cfg, ins, offset);
3283
3284                 switch (ins->opcode) {
3285                 case OP_BIGMUL:
3286                         amd64_mul_reg (code, ins->sreg2, TRUE);
3287                         break;
3288                 case OP_BIGMUL_UN:
3289                         amd64_mul_reg (code, ins->sreg2, FALSE);
3290                         break;
3291                 case OP_X86_SETEQ_MEMBASE:
3292                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
3293                         break;
3294                 case OP_STOREI1_MEMBASE_IMM:
3295                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
3296                         break;
3297                 case OP_STOREI2_MEMBASE_IMM:
3298                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
3299                         break;
3300                 case OP_STOREI4_MEMBASE_IMM:
3301                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
3302                         break;
3303                 case OP_STOREI1_MEMBASE_REG:
3304                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
3305                         break;
3306                 case OP_STOREI2_MEMBASE_REG:
3307                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
3308                         break;
3309                 case OP_STORE_MEMBASE_REG:
3310                 case OP_STOREI8_MEMBASE_REG:
3311                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
3312                         break;
3313                 case OP_STOREI4_MEMBASE_REG:
3314                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
3315                         break;
3316                 case OP_STORE_MEMBASE_IMM:
3317                 case OP_STOREI8_MEMBASE_IMM:
3318                         g_assert (amd64_is_imm32 (ins->inst_imm));
3319                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
3320                         break;
3321                 case OP_LOAD_MEM:
3322                 case OP_LOADI8_MEM:
3323                         // FIXME: Decompose this earlier
3324                         if (amd64_is_imm32 (ins->inst_imm))
3325                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, sizeof (gpointer));
3326                         else {
3327                                 amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3328                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 8);
3329                         }
3330                         break;
3331                 case OP_LOADI4_MEM:
3332                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3333                         amd64_movsxd_reg_membase (code, ins->dreg, ins->dreg, 0);
3334                         break;
3335                 case OP_LOADU4_MEM:
3336                         // FIXME: Decompose this earlier
3337                         if (amd64_is_imm32 (ins->inst_imm))
3338                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
3339                         else {
3340                                 amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3341                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
3342                         }
3343                         break;
3344                 case OP_LOADU1_MEM:
3345                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3346                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, FALSE);
3347                         break;
3348                 case OP_LOADU2_MEM:
3349                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3350                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, TRUE);
3351                         break;
3352                 case OP_LOAD_MEMBASE:
3353                 case OP_LOADI8_MEMBASE:
3354                         g_assert (amd64_is_imm32 (ins->inst_offset));
3355                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
3356                         break;
3357                 case OP_LOADI4_MEMBASE:
3358                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
3359                         break;
3360                 case OP_LOADU4_MEMBASE:
3361                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
3362                         break;
3363                 case OP_LOADU1_MEMBASE:
3364                         /* The cpu zero extends the result into 64 bits */
3365                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE, 4);
3366                         break;
3367                 case OP_LOADI1_MEMBASE:
3368                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
3369                         break;
3370                 case OP_LOADU2_MEMBASE:
3371                         /* The cpu zero extends the result into 64 bits */
3372                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE, 4);
3373                         break;
3374                 case OP_LOADI2_MEMBASE:
3375                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
3376                         break;
3377                 case OP_AMD64_LOADI8_MEMINDEX:
3378                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
3379                         break;
3380                 case OP_LCONV_TO_I1:
3381                 case OP_ICONV_TO_I1:
3382                 case OP_SEXT_I1:
3383                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3384                         break;
3385                 case OP_LCONV_TO_I2:
3386                 case OP_ICONV_TO_I2:
3387                 case OP_SEXT_I2:
3388                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3389                         break;
3390                 case OP_LCONV_TO_U1:
3391                 case OP_ICONV_TO_U1:
3392                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
3393                         break;
3394                 case OP_LCONV_TO_U2:
3395                 case OP_ICONV_TO_U2:
3396                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
3397                         break;
3398                 case OP_ZEXT_I4:
3399                         /* Clean out the upper word */
3400                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
3401                         break;
3402                 case OP_SEXT_I4:
3403                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
3404                         break;
3405                 case OP_COMPARE:
3406                 case OP_LCOMPARE:
3407                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3408                         break;
3409                 case OP_COMPARE_IMM:
3410                 case OP_LCOMPARE_IMM:
3411                         g_assert (amd64_is_imm32 (ins->inst_imm));
3412                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
3413                         break;
3414                 case OP_X86_COMPARE_REG_MEMBASE:
3415                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
3416                         break;
3417                 case OP_X86_TEST_NULL:
3418                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
3419                         break;
3420                 case OP_AMD64_TEST_NULL:
3421                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3422                         break;
3423
3424                 case OP_X86_ADD_REG_MEMBASE:
3425                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3426                         break;
3427                 case OP_X86_SUB_REG_MEMBASE:
3428                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3429                         break;
3430                 case OP_X86_AND_REG_MEMBASE:
3431                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3432                         break;
3433                 case OP_X86_OR_REG_MEMBASE:
3434                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3435                         break;
3436                 case OP_X86_XOR_REG_MEMBASE:
3437                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3438                         break;
3439
3440                 case OP_X86_ADD_MEMBASE_IMM:
3441                         /* FIXME: Make a 64 version too */
3442                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3443                         break;
3444                 case OP_X86_SUB_MEMBASE_IMM:
3445                         g_assert (amd64_is_imm32 (ins->inst_imm));
3446                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3447                         break;
3448                 case OP_X86_AND_MEMBASE_IMM:
3449                         g_assert (amd64_is_imm32 (ins->inst_imm));
3450                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3451                         break;
3452                 case OP_X86_OR_MEMBASE_IMM:
3453                         g_assert (amd64_is_imm32 (ins->inst_imm));
3454                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3455                         break;
3456                 case OP_X86_XOR_MEMBASE_IMM:
3457                         g_assert (amd64_is_imm32 (ins->inst_imm));
3458                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3459                         break;
3460                 case OP_X86_ADD_MEMBASE_REG:
3461                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3462                         break;
3463                 case OP_X86_SUB_MEMBASE_REG:
3464                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3465                         break;
3466                 case OP_X86_AND_MEMBASE_REG:
3467                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3468                         break;
3469                 case OP_X86_OR_MEMBASE_REG:
3470                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3471                         break;
3472                 case OP_X86_XOR_MEMBASE_REG:
3473                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3474                         break;
3475                 case OP_X86_INC_MEMBASE:
3476                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3477                         break;
3478                 case OP_X86_INC_REG:
3479                         amd64_inc_reg_size (code, ins->dreg, 4);
3480                         break;
3481                 case OP_X86_DEC_MEMBASE:
3482                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3483                         break;
3484                 case OP_X86_DEC_REG:
3485                         amd64_dec_reg_size (code, ins->dreg, 4);
3486                         break;
3487                 case OP_X86_MUL_REG_MEMBASE:
3488                 case OP_X86_MUL_MEMBASE_REG:
3489                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3490                         break;
3491                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
3492                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3493                         break;
3494                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3495                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3496                         break;
3497                 case OP_AMD64_COMPARE_MEMBASE_REG:
3498                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3499                         break;
3500                 case OP_AMD64_COMPARE_MEMBASE_IMM:
3501                         g_assert (amd64_is_imm32 (ins->inst_imm));
3502                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3503                         break;
3504                 case OP_X86_COMPARE_MEMBASE8_IMM:
3505                         amd64_alu_membase8_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3506                         break;
3507                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
3508                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3509                         break;
3510                 case OP_AMD64_COMPARE_REG_MEMBASE:
3511                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3512                         break;
3513
3514                 case OP_AMD64_ADD_REG_MEMBASE:
3515                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3516                         break;
3517                 case OP_AMD64_SUB_REG_MEMBASE:
3518                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3519                         break;
3520                 case OP_AMD64_AND_REG_MEMBASE:
3521                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3522                         break;
3523                 case OP_AMD64_OR_REG_MEMBASE:
3524                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3525                         break;
3526                 case OP_AMD64_XOR_REG_MEMBASE:
3527                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3528                         break;
3529
3530                 case OP_AMD64_ADD_MEMBASE_REG:
3531                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3532                         break;
3533                 case OP_AMD64_SUB_MEMBASE_REG:
3534                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3535                         break;
3536                 case OP_AMD64_AND_MEMBASE_REG:
3537                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3538                         break;
3539                 case OP_AMD64_OR_MEMBASE_REG:
3540                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3541                         break;
3542                 case OP_AMD64_XOR_MEMBASE_REG:
3543                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3544                         break;
3545
3546                 case OP_AMD64_ADD_MEMBASE_IMM:
3547                         g_assert (amd64_is_imm32 (ins->inst_imm));
3548                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3549                         break;
3550                 case OP_AMD64_SUB_MEMBASE_IMM:
3551                         g_assert (amd64_is_imm32 (ins->inst_imm));
3552                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3553                         break;
3554                 case OP_AMD64_AND_MEMBASE_IMM:
3555                         g_assert (amd64_is_imm32 (ins->inst_imm));
3556                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3557                         break;
3558                 case OP_AMD64_OR_MEMBASE_IMM:
3559                         g_assert (amd64_is_imm32 (ins->inst_imm));
3560                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3561                         break;
3562                 case OP_AMD64_XOR_MEMBASE_IMM:
3563                         g_assert (amd64_is_imm32 (ins->inst_imm));
3564                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3565                         break;
3566
3567                 case OP_BREAK:
3568                         amd64_breakpoint (code);
3569                         break;
3570                 case OP_RELAXED_NOP:
3571                         x86_prefix (code, X86_REP_PREFIX);
3572                         x86_nop (code);
3573                         break;
3574                 case OP_HARD_NOP:
3575                         x86_nop (code);
3576                         break;
3577                 case OP_NOP:
3578                 case OP_DUMMY_USE:
3579                 case OP_DUMMY_STORE:
3580                 case OP_NOT_REACHED:
3581                 case OP_NOT_NULL:
3582                         break;
3583                 case OP_SEQ_POINT: {
3584                         int i;
3585
3586                         if (cfg->compile_aot)
3587                                 NOT_IMPLEMENTED;
3588
3589                         /* 
3590                          * Read from the single stepping trigger page. This will cause a
3591                          * SIGSEGV when single stepping is enabled.
3592                          * We do this _before_ the breakpoint, so single stepping after
3593                          * a breakpoint is hit will step to the next IL offset.
3594                          */
3595                         if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
3596                                 if (((guint64)ss_trigger_page >> 32) == 0)
3597                                         amd64_mov_reg_mem (code, AMD64_R11, (guint64)ss_trigger_page, 4);
3598                                 else {
3599                                         MonoInst *var = cfg->arch.ss_trigger_page_var;
3600
3601                                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
3602                                         amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4);
3603                                 }
3604                         }
3605
3606                         /* 
3607                          * This is the address which is saved in seq points, 
3608                          * get_ip_for_single_step () / get_ip_for_breakpoint () needs to compute this
3609                          * from the address of the instruction causing the fault.
3610                          */
3611                         mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
3612
3613                         /* 
3614                          * A placeholder for a possible breakpoint inserted by
3615                          * mono_arch_set_breakpoint ().
3616                          */
3617                         for (i = 0; i < breakpoint_size; ++i)
3618                                 x86_nop (code);
3619                         break;
3620                 }
3621                 case OP_ADDCC:
3622                 case OP_LADD:
3623                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
3624                         break;
3625                 case OP_ADC:
3626                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
3627                         break;
3628                 case OP_ADD_IMM:
3629                 case OP_LADD_IMM:
3630                         g_assert (amd64_is_imm32 (ins->inst_imm));
3631                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
3632                         break;
3633                 case OP_ADC_IMM:
3634                         g_assert (amd64_is_imm32 (ins->inst_imm));
3635                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
3636                         break;
3637                 case OP_SUBCC:
3638                 case OP_LSUB:
3639                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
3640                         break;
3641                 case OP_SBB:
3642                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
3643                         break;
3644                 case OP_SUB_IMM:
3645                 case OP_LSUB_IMM:
3646                         g_assert (amd64_is_imm32 (ins->inst_imm));
3647                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
3648                         break;
3649                 case OP_SBB_IMM:
3650                         g_assert (amd64_is_imm32 (ins->inst_imm));
3651                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
3652                         break;
3653                 case OP_LAND:
3654                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
3655                         break;
3656                 case OP_AND_IMM:
3657                 case OP_LAND_IMM:
3658                         g_assert (amd64_is_imm32 (ins->inst_imm));
3659                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3660                         break;
3661                 case OP_LMUL:
3662                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3663                         break;
3664                 case OP_MUL_IMM:
3665                 case OP_LMUL_IMM:
3666                 case OP_IMUL_IMM: {
3667                         guint32 size = (ins->opcode == OP_IMUL_IMM) ? 4 : 8;
3668                         
3669                         switch (ins->inst_imm) {
3670                         case 2:
3671                                 /* MOV r1, r2 */
3672                                 /* ADD r1, r1 */
3673                                 if (ins->dreg != ins->sreg1)
3674                                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, size);
3675                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
3676                                 break;
3677                         case 3:
3678                                 /* LEA r1, [r2 + r2*2] */
3679                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
3680                                 break;
3681                         case 5:
3682                                 /* LEA r1, [r2 + r2*4] */
3683                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
3684                                 break;
3685                         case 6:
3686                                 /* LEA r1, [r2 + r2*2] */
3687                                 /* ADD r1, r1          */
3688                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
3689                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
3690                                 break;
3691                         case 9:
3692                                 /* LEA r1, [r2 + r2*8] */
3693                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
3694                                 break;
3695                         case 10:
3696                                 /* LEA r1, [r2 + r2*4] */
3697                                 /* ADD r1, r1          */
3698                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
3699                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
3700                                 break;
3701                         case 12:
3702                                 /* LEA r1, [r2 + r2*2] */
3703                                 /* SHL r1, 2           */
3704                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
3705                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
3706                                 break;
3707                         case 25:
3708                                 /* LEA r1, [r2 + r2*4] */
3709                                 /* LEA r1, [r1 + r1*4] */
3710                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
3711                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
3712                                 break;
3713                         case 100:
3714                                 /* LEA r1, [r2 + r2*4] */
3715                                 /* SHL r1, 2           */
3716                                 /* LEA r1, [r1 + r1*4] */
3717                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
3718                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
3719                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
3720                                 break;
3721                         default:
3722                                 amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, size);
3723                                 break;
3724                         }
3725                         break;
3726                 }
3727                 case OP_LDIV:
3728                 case OP_LREM:
3729                         /* Regalloc magic makes the div/rem cases the same */
3730                         if (ins->sreg2 == AMD64_RDX) {
3731                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
3732                                 amd64_cdq (code);
3733                                 amd64_div_membase (code, AMD64_RSP, -8, TRUE);
3734                         } else {
3735                                 amd64_cdq (code);
3736                                 amd64_div_reg (code, ins->sreg2, TRUE);
3737                         }
3738                         break;
3739                 case OP_LDIV_UN:
3740                 case OP_LREM_UN:
3741                         if (ins->sreg2 == AMD64_RDX) {
3742                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
3743                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3744                                 amd64_div_membase (code, AMD64_RSP, -8, FALSE);
3745                         } else {
3746                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3747                                 amd64_div_reg (code, ins->sreg2, FALSE);
3748                         }
3749                         break;
3750                 case OP_IDIV:
3751                 case OP_IREM:
3752                         if (ins->sreg2 == AMD64_RDX) {
3753                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
3754                                 amd64_cdq_size (code, 4);
3755                                 amd64_div_membase_size (code, AMD64_RSP, -8, TRUE, 4);
3756                         } else {
3757                                 amd64_cdq_size (code, 4);
3758                                 amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
3759                         }
3760                         break;
3761                 case OP_IDIV_UN:
3762                 case OP_IREM_UN:
3763                         if (ins->sreg2 == AMD64_RDX) {
3764                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
3765                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3766                                 amd64_div_membase_size (code, AMD64_RSP, -8, FALSE, 4);
3767                         } else {
3768                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3769                                 amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
3770                         }
3771                         break;
3772                 case OP_IREM_IMM: {
3773                         int power = mono_is_power_of_two (ins->inst_imm);
3774
3775                         g_assert (ins->sreg1 == X86_EAX);
3776                         g_assert (ins->dreg == X86_EAX);
3777                         g_assert (power >= 0);
3778
3779                         if (power == 0) {
3780                                 amd64_mov_reg_imm (code, ins->dreg, 0);
3781                                 break;
3782                         }
3783
3784                         /* Based on gcc code */
3785
3786                         /* Add compensation for negative dividents */
3787                         amd64_mov_reg_reg_size (code, AMD64_RDX, AMD64_RAX, 4);
3788                         if (power > 1)
3789                                 amd64_shift_reg_imm_size (code, X86_SAR, AMD64_RDX, 31, 4);
3790                         amd64_shift_reg_imm_size (code, X86_SHR, AMD64_RDX, 32 - power, 4);
3791                         amd64_alu_reg_reg_size (code, X86_ADD, AMD64_RAX, AMD64_RDX, 4);
3792                         /* Compute remainder */
3793                         amd64_alu_reg_imm_size (code, X86_AND, AMD64_RAX, (1 << power) - 1, 4);
3794                         /* Remove compensation */
3795                         amd64_alu_reg_reg_size (code, X86_SUB, AMD64_RAX, AMD64_RDX, 4);
3796                         break;
3797                 }
3798                 case OP_LMUL_OVF:
3799                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3800                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3801                         break;
3802                 case OP_LOR:
3803                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3804                         break;
3805                 case OP_OR_IMM:
3806                 case OP_LOR_IMM:
3807                         g_assert (amd64_is_imm32 (ins->inst_imm));
3808                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3809                         break;
3810                 case OP_LXOR:
3811                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3812                         break;
3813                 case OP_XOR_IMM:
3814                 case OP_LXOR_IMM:
3815                         g_assert (amd64_is_imm32 (ins->inst_imm));
3816                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3817                         break;
3818                 case OP_LSHL:
3819                         g_assert (ins->sreg2 == AMD64_RCX);
3820                         amd64_shift_reg (code, X86_SHL, ins->dreg);
3821                         break;
3822                 case OP_LSHR:
3823                         g_assert (ins->sreg2 == AMD64_RCX);
3824                         amd64_shift_reg (code, X86_SAR, ins->dreg);
3825                         break;
3826                 case OP_SHR_IMM:
3827                         g_assert (amd64_is_imm32 (ins->inst_imm));
3828                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3829                         break;
3830                 case OP_LSHR_IMM:
3831                         g_assert (amd64_is_imm32 (ins->inst_imm));
3832                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3833                         break;
3834                 case OP_SHR_UN_IMM:
3835                         g_assert (amd64_is_imm32 (ins->inst_imm));
3836                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3837                         break;
3838                 case OP_LSHR_UN_IMM:
3839                         g_assert (amd64_is_imm32 (ins->inst_imm));
3840                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3841                         break;
3842                 case OP_LSHR_UN:
3843                         g_assert (ins->sreg2 == AMD64_RCX);
3844                         amd64_shift_reg (code, X86_SHR, ins->dreg);
3845                         break;
3846                 case OP_SHL_IMM:
3847                         g_assert (amd64_is_imm32 (ins->inst_imm));
3848                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3849                         break;
3850                 case OP_LSHL_IMM:
3851                         g_assert (amd64_is_imm32 (ins->inst_imm));
3852                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3853                         break;
3854
3855                 case OP_IADDCC:
3856                 case OP_IADD:
3857                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
3858                         break;
3859                 case OP_IADC:
3860                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
3861                         break;
3862                 case OP_IADD_IMM:
3863                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
3864                         break;
3865                 case OP_IADC_IMM:
3866                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
3867                         break;
3868                 case OP_ISUBCC:
3869                 case OP_ISUB:
3870                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
3871                         break;
3872                 case OP_ISBB:
3873                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
3874                         break;
3875                 case OP_ISUB_IMM:
3876                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
3877                         break;
3878                 case OP_ISBB_IMM:
3879                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
3880                         break;
3881                 case OP_IAND:
3882                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
3883                         break;
3884                 case OP_IAND_IMM:
3885                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
3886                         break;
3887                 case OP_IOR:
3888                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
3889                         break;
3890                 case OP_IOR_IMM:
3891                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
3892                         break;
3893                 case OP_IXOR:
3894                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
3895                         break;
3896                 case OP_IXOR_IMM:
3897                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
3898                         break;
3899                 case OP_INEG:
3900                         amd64_neg_reg_size (code, ins->sreg1, 4);
3901                         break;
3902                 case OP_INOT:
3903                         amd64_not_reg_size (code, ins->sreg1, 4);
3904                         break;
3905                 case OP_ISHL:
3906                         g_assert (ins->sreg2 == AMD64_RCX);
3907                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
3908                         break;
3909                 case OP_ISHR:
3910                         g_assert (ins->sreg2 == AMD64_RCX);
3911                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
3912                         break;
3913                 case OP_ISHR_IMM:
3914                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3915                         break;
3916                 case OP_ISHR_UN_IMM:
3917                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3918                         break;
3919                 case OP_ISHR_UN:
3920                         g_assert (ins->sreg2 == AMD64_RCX);
3921                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3922                         break;
3923                 case OP_ISHL_IMM:
3924                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3925                         break;
3926                 case OP_IMUL:
3927                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3928                         break;
3929                 case OP_IMUL_OVF:
3930                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3931                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3932                         break;
3933                 case OP_IMUL_OVF_UN:
3934                 case OP_LMUL_OVF_UN: {
3935                         /* the mul operation and the exception check should most likely be split */
3936                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3937                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
3938                         /*g_assert (ins->sreg2 == X86_EAX);
3939                         g_assert (ins->dreg == X86_EAX);*/
3940                         if (ins->sreg2 == X86_EAX) {
3941                                 non_eax_reg = ins->sreg1;
3942                         } else if (ins->sreg1 == X86_EAX) {
3943                                 non_eax_reg = ins->sreg2;
3944                         } else {
3945                                 /* no need to save since we're going to store to it anyway */
3946                                 if (ins->dreg != X86_EAX) {
3947                                         saved_eax = TRUE;
3948                                         amd64_push_reg (code, X86_EAX);
3949                                 }
3950                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
3951                                 non_eax_reg = ins->sreg2;
3952                         }
3953                         if (ins->dreg == X86_EDX) {
3954                                 if (!saved_eax) {
3955                                         saved_eax = TRUE;
3956                                         amd64_push_reg (code, X86_EAX);
3957                                 }
3958                         } else {
3959                                 saved_edx = TRUE;
3960                                 amd64_push_reg (code, X86_EDX);
3961                         }
3962                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
3963                         /* save before the check since pop and mov don't change the flags */
3964                         if (ins->dreg != X86_EAX)
3965                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
3966                         if (saved_edx)
3967                                 amd64_pop_reg (code, X86_EDX);
3968                         if (saved_eax)
3969                                 amd64_pop_reg (code, X86_EAX);
3970                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3971                         break;
3972                 }
3973                 case OP_ICOMPARE:
3974                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3975                         break;
3976                 case OP_ICOMPARE_IMM:
3977                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
3978                         break;
3979                 case OP_IBEQ:
3980                 case OP_IBLT:
3981                 case OP_IBGT:
3982                 case OP_IBGE:
3983                 case OP_IBLE:
3984                 case OP_LBEQ:
3985                 case OP_LBLT:
3986                 case OP_LBGT:
3987                 case OP_LBGE:
3988                 case OP_LBLE:
3989                 case OP_IBNE_UN:
3990                 case OP_IBLT_UN:
3991                 case OP_IBGT_UN:
3992                 case OP_IBGE_UN:
3993                 case OP_IBLE_UN:
3994                 case OP_LBNE_UN:
3995                 case OP_LBLT_UN:
3996                 case OP_LBGT_UN:
3997                 case OP_LBGE_UN:
3998                 case OP_LBLE_UN:
3999                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
4000                         break;
4001
4002                 case OP_CMOV_IEQ:
4003                 case OP_CMOV_IGE:
4004                 case OP_CMOV_IGT:
4005                 case OP_CMOV_ILE:
4006                 case OP_CMOV_ILT:
4007                 case OP_CMOV_INE_UN:
4008                 case OP_CMOV_IGE_UN:
4009                 case OP_CMOV_IGT_UN:
4010                 case OP_CMOV_ILE_UN:
4011                 case OP_CMOV_ILT_UN:
4012                 case OP_CMOV_LEQ:
4013                 case OP_CMOV_LGE:
4014                 case OP_CMOV_LGT:
4015                 case OP_CMOV_LLE:
4016                 case OP_CMOV_LLT:
4017                 case OP_CMOV_LNE_UN:
4018                 case OP_CMOV_LGE_UN:
4019                 case OP_CMOV_LGT_UN:
4020                 case OP_CMOV_LLE_UN:
4021                 case OP_CMOV_LLT_UN:
4022                         g_assert (ins->dreg == ins->sreg1);
4023                         /* This needs to operate on 64 bit values */
4024                         amd64_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
4025                         break;
4026
4027                 case OP_LNOT:
4028                         amd64_not_reg (code, ins->sreg1);
4029                         break;
4030                 case OP_LNEG:
4031                         amd64_neg_reg (code, ins->sreg1);
4032                         break;
4033
4034                 case OP_ICONST:
4035                 case OP_I8CONST:
4036                         if ((((guint64)ins->inst_c0) >> 32) == 0)
4037                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
4038                         else
4039                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
4040                         break;
4041                 case OP_AOTCONST:
4042                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4043                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8);
4044                         break;
4045                 case OP_JUMP_TABLE:
4046                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4047                         amd64_mov_reg_imm_size (code, ins->dreg, 0, 8);
4048                         break;
4049                 case OP_MOVE:
4050                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
4051                         break;
4052                 case OP_AMD64_SET_XMMREG_R4: {
4053                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
4054                         break;
4055                 }
4056                 case OP_AMD64_SET_XMMREG_R8: {
4057                         if (ins->dreg != ins->sreg1)
4058                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
4059                         break;
4060                 }
4061                 case OP_TAILCALL: {
4062                         /*
4063                          * Note: this 'frame destruction' logic is useful for tail calls, too.
4064                          * Keep in sync with the code in emit_epilog.
4065                          */
4066                         int pos = 0, i;
4067
4068                         /* FIXME: no tracing support... */
4069                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4070                                 code = mono_arch_instrument_epilog_full (cfg, mono_profiler_method_leave, code, FALSE, FALSE);
4071
4072                         g_assert (!cfg->method->save_lmf);
4073
4074                         if (cfg->arch.omit_fp) {
4075                                 guint32 save_offset = 0;
4076                                 /* Pop callee-saved registers */
4077                                 for (i = 0; i < AMD64_NREG; ++i)
4078                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4079                                                 amd64_mov_reg_membase (code, i, AMD64_RSP, save_offset, 8);
4080                                                 save_offset += 8;
4081                                         }
4082                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
4083                         }
4084                         else {
4085                                 for (i = 0; i < AMD64_NREG; ++i)
4086                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4087                                                 pos -= sizeof (gpointer);
4088                         
4089                                 if (pos)
4090                                         amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4091
4092                                 /* Pop registers in reverse order */
4093                                 for (i = AMD64_NREG - 1; i > 0; --i)
4094                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4095                                                 amd64_pop_reg (code, i);
4096                                         }
4097
4098                                 amd64_leave (code);
4099                         }
4100
4101                         offset = code - cfg->native_code;
4102                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
4103                         if (cfg->compile_aot)
4104                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
4105                         else
4106                                 amd64_set_reg_template (code, AMD64_R11);
4107                         amd64_jump_reg (code, AMD64_R11);
4108                         break;
4109                 }
4110                 case OP_CHECK_THIS:
4111                         /* ensure ins->sreg1 is not NULL */
4112                         amd64_alu_membase_imm_size (code, X86_CMP, ins->sreg1, 0, 0, 4);
4113                         break;
4114                 case OP_ARGLIST: {
4115                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, cfg->sig_cookie);
4116                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8);
4117                         break;
4118                 }
4119                 case OP_CALL:
4120                 case OP_FCALL:
4121                 case OP_LCALL:
4122                 case OP_VCALL:
4123                 case OP_VCALL2:
4124                 case OP_VOIDCALL:
4125                         call = (MonoCallInst*)ins;
4126                         /*
4127                          * The AMD64 ABI forces callers to know about varargs.
4128                          */
4129                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
4130                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4131                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4132                                 /* 
4133                                  * Since the unmanaged calling convention doesn't contain a 
4134                                  * 'vararg' entry, we have to treat every pinvoke call as a
4135                                  * potential vararg call.
4136                                  */
4137                                 guint32 nregs, i;
4138                                 nregs = 0;
4139                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4140                                         if (call->used_fregs & (1 << i))
4141                                                 nregs ++;
4142                                 if (!nregs)
4143                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4144                                 else
4145                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4146                         }
4147
4148                         if (ins->flags & MONO_INST_HAS_METHOD)
4149                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE);
4150                         else
4151                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE);
4152                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
4153                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
4154                         code = emit_move_return_value (cfg, ins, code);
4155                         break;
4156                 case OP_FCALL_REG:
4157                 case OP_LCALL_REG:
4158                 case OP_VCALL_REG:
4159                 case OP_VCALL2_REG:
4160                 case OP_VOIDCALL_REG:
4161                 case OP_CALL_REG:
4162                         call = (MonoCallInst*)ins;
4163
4164                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
4165                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4166                                 ins->sreg1 = AMD64_R11;
4167                         }
4168
4169                         /*
4170                          * The AMD64 ABI forces callers to know about varargs.
4171                          */
4172                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
4173                                 if (ins->sreg1 == AMD64_RAX) {
4174                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4175                                         ins->sreg1 = AMD64_R11;
4176                                 }
4177                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4178                         } else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4179                                 /* 
4180                                  * Since the unmanaged calling convention doesn't contain a 
4181                                  * 'vararg' entry, we have to treat every pinvoke call as a
4182                                  * potential vararg call.
4183                                  */
4184                                 guint32 nregs, i;
4185                                 nregs = 0;
4186                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4187                                         if (call->used_fregs & (1 << i))
4188                                                 nregs ++;
4189                                 if (ins->sreg1 == AMD64_RAX) {
4190                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4191                                         ins->sreg1 = AMD64_R11;
4192                                 }
4193                                 if (!nregs)
4194                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4195                                 else
4196                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4197                         }
4198
4199                         amd64_call_reg (code, ins->sreg1);
4200                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
4201                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
4202                         code = emit_move_return_value (cfg, ins, code);
4203                         break;
4204                 case OP_FCALL_MEMBASE:
4205                 case OP_LCALL_MEMBASE:
4206                 case OP_VCALL_MEMBASE:
4207                 case OP_VCALL2_MEMBASE:
4208                 case OP_VOIDCALL_MEMBASE:
4209                 case OP_CALL_MEMBASE:
4210                         call = (MonoCallInst*)ins;
4211
4212                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
4213                                 /* 
4214                                  * Can't use R11 because it is clobbered by the trampoline 
4215                                  * code, and the reg value is needed by get_vcall_slot_addr.
4216                                  */
4217                                 amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
4218                                 ins->sreg1 = AMD64_RAX;
4219                         }
4220
4221                         /* 
4222                          * Emit a few nops to simplify get_vcall_slot ().
4223                          */
4224                         amd64_nop (code);
4225                         amd64_nop (code);
4226                         amd64_nop (code);
4227
4228                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
4229                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention) && !cfg->arch.no_pushes)
4230                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
4231                         code = emit_move_return_value (cfg, ins, code);
4232                         break;
4233                 case OP_DYN_CALL: {
4234                         int i;
4235                         MonoInst *var = cfg->dyn_call_var;
4236
4237                         g_assert (var->opcode == OP_REGOFFSET);
4238
4239                         /* r11 = args buffer filled by mono_arch_get_dyn_call_args () */
4240                         amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4241                         /* r10 = ftn */
4242                         amd64_mov_reg_reg (code, AMD64_R10, ins->sreg2, 8);
4243
4244                         /* Save args buffer */
4245                         amd64_mov_membase_reg (code, var->inst_basereg, var->inst_offset, AMD64_R11, 8);
4246
4247                         /* Set argument registers */
4248                         for (i = 0; i < PARAM_REGS; ++i)
4249                                 amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof (gpointer), 8);
4250                         
4251                         /* Make the call */
4252                         amd64_call_reg (code, AMD64_R10);
4253
4254                         /* Save result */
4255                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4256                         amd64_mov_membase_reg (code, AMD64_R11, G_STRUCT_OFFSET (DynCallArgs, res), AMD64_RAX, 8);
4257                         break;
4258                 }
4259                 case OP_AMD64_SAVE_SP_TO_LMF:
4260                         amd64_mov_membase_reg (code, cfg->frame_reg, cfg->arch.lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
4261                         break;
4262                 case OP_X86_PUSH:
4263                         g_assert (!cfg->arch.no_pushes);
4264                         amd64_push_reg (code, ins->sreg1);
4265                         break;
4266                 case OP_X86_PUSH_IMM:
4267                         g_assert (!cfg->arch.no_pushes);
4268                         g_assert (amd64_is_imm32 (ins->inst_imm));
4269                         amd64_push_imm (code, ins->inst_imm);
4270                         break;
4271                 case OP_X86_PUSH_MEMBASE:
4272                         g_assert (!cfg->arch.no_pushes);
4273                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
4274                         break;
4275                 case OP_X86_PUSH_OBJ: {
4276                         int size = ALIGN_TO (ins->inst_imm, 8);
4277
4278                         g_assert (!cfg->arch.no_pushes);
4279
4280                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4281                         amd64_push_reg (code, AMD64_RDI);
4282                         amd64_push_reg (code, AMD64_RSI);
4283                         amd64_push_reg (code, AMD64_RCX);
4284                         if (ins->inst_offset)
4285                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
4286                         else
4287                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
4288                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, (3 * 8));
4289                         amd64_mov_reg_imm (code, AMD64_RCX, (size >> 3));
4290                         amd64_cld (code);
4291                         amd64_prefix (code, X86_REP_PREFIX);
4292                         amd64_movsd (code);
4293                         amd64_pop_reg (code, AMD64_RCX);
4294                         amd64_pop_reg (code, AMD64_RSI);
4295                         amd64_pop_reg (code, AMD64_RDI);
4296                         break;
4297                 }
4298                 case OP_X86_LEA:
4299                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
4300                         break;
4301                 case OP_X86_LEA_MEMBASE:
4302                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
4303                         break;
4304                 case OP_X86_XCHG:
4305                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
4306                         break;
4307                 case OP_LOCALLOC:
4308                         /* keep alignment */
4309                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
4310                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
4311                         code = mono_emit_stack_alloc (cfg, code, ins);
4312                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4313                         if (cfg->param_area && cfg->arch.no_pushes)
4314                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4315                         break;
4316                 case OP_LOCALLOC_IMM: {
4317                         guint32 size = ins->inst_imm;
4318                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
4319
4320                         if (ins->flags & MONO_INST_INIT) {
4321                                 if (size < 64) {
4322                                         int i;
4323
4324                                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4325                                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4326
4327                                         for (i = 0; i < size; i += 8)
4328                                                 amd64_mov_membase_reg (code, AMD64_RSP, i, ins->dreg, 8);
4329                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);                                      
4330                                 } else {
4331                                         amd64_mov_reg_imm (code, ins->dreg, size);
4332                                         ins->sreg1 = ins->dreg;
4333
4334                                         code = mono_emit_stack_alloc (cfg, code, ins);
4335                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4336                                 }
4337                         } else {
4338                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4339                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4340                         }
4341                         if (cfg->param_area && cfg->arch.no_pushes)
4342                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4343                         break;
4344                 }
4345                 case OP_THROW: {
4346                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4347                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4348                                              (gpointer)"mono_arch_throw_exception", FALSE);
4349                         break;
4350                 }
4351                 case OP_RETHROW: {
4352                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4353                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4354                                              (gpointer)"mono_arch_rethrow_exception", FALSE);
4355                         break;
4356                 }
4357                 case OP_CALL_HANDLER: 
4358                         /* Align stack */
4359                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4360                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
4361                         amd64_call_imm (code, 0);
4362                         mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
4363                         /* Restore stack alignment */
4364                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4365                         break;
4366                 case OP_START_HANDLER: {
4367                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4368                         amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, 8);
4369
4370                         if ((MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY) ||
4371                                  MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY)) &&
4372                                 cfg->param_area && cfg->arch.no_pushes) {
4373                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
4374                         }
4375                         break;
4376                 }
4377                 case OP_ENDFINALLY: {
4378                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4379                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8);
4380                         amd64_ret (code);
4381                         break;
4382                 }
4383                 case OP_ENDFILTER: {
4384                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4385                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8);
4386                         /* The local allocator will put the result into RAX */
4387                         amd64_ret (code);
4388                         break;
4389                 }
4390
4391                 case OP_LABEL:
4392                         ins->inst_c0 = code - cfg->native_code;
4393                         break;
4394                 case OP_BR:
4395                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
4396                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
4397                         //break;
4398                                 if (ins->inst_target_bb->native_offset) {
4399                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
4400                                 } else {
4401                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
4402                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
4403                                             x86_is_imm8 (ins->inst_target_bb->max_offset - offset))
4404                                                 x86_jump8 (code, 0);
4405                                         else 
4406                                                 x86_jump32 (code, 0);
4407                         }
4408                         break;
4409                 case OP_BR_REG:
4410                         amd64_jump_reg (code, ins->sreg1);
4411                         break;
4412                 case OP_CEQ:
4413                 case OP_LCEQ:
4414                 case OP_ICEQ:
4415                 case OP_CLT:
4416                 case OP_LCLT:
4417                 case OP_ICLT:
4418                 case OP_CGT:
4419                 case OP_ICGT:
4420                 case OP_LCGT:
4421                 case OP_CLT_UN:
4422                 case OP_LCLT_UN:
4423                 case OP_ICLT_UN:
4424                 case OP_CGT_UN:
4425                 case OP_LCGT_UN:
4426                 case OP_ICGT_UN:
4427                         amd64_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
4428                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4429                         break;
4430                 case OP_COND_EXC_EQ:
4431                 case OP_COND_EXC_NE_UN:
4432                 case OP_COND_EXC_LT:
4433                 case OP_COND_EXC_LT_UN:
4434                 case OP_COND_EXC_GT:
4435                 case OP_COND_EXC_GT_UN:
4436                 case OP_COND_EXC_GE:
4437                 case OP_COND_EXC_GE_UN:
4438                 case OP_COND_EXC_LE:
4439                 case OP_COND_EXC_LE_UN:
4440                 case OP_COND_EXC_IEQ:
4441                 case OP_COND_EXC_INE_UN:
4442                 case OP_COND_EXC_ILT:
4443                 case OP_COND_EXC_ILT_UN:
4444                 case OP_COND_EXC_IGT:
4445                 case OP_COND_EXC_IGT_UN:
4446                 case OP_COND_EXC_IGE:
4447                 case OP_COND_EXC_IGE_UN:
4448                 case OP_COND_EXC_ILE:
4449                 case OP_COND_EXC_ILE_UN:
4450                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
4451                         break;
4452                 case OP_COND_EXC_OV:
4453                 case OP_COND_EXC_NO:
4454                 case OP_COND_EXC_C:
4455                 case OP_COND_EXC_NC:
4456                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
4457                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
4458                         break;
4459                 case OP_COND_EXC_IOV:
4460                 case OP_COND_EXC_INO:
4461                 case OP_COND_EXC_IC:
4462                 case OP_COND_EXC_INC:
4463                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], 
4464                                                     (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
4465                         break;
4466
4467                 /* floating point opcodes */
4468                 case OP_R8CONST: {
4469                         double d = *(double *)ins->inst_p0;
4470
4471                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
4472                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
4473                         }
4474                         else {
4475                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
4476                                 amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4477                         }
4478                         break;
4479                 }
4480                 case OP_R4CONST: {
4481                         float f = *(float *)ins->inst_p0;
4482
4483                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
4484                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
4485                         }
4486                         else {
4487                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
4488                                 amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4489                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4490                         }
4491                         break;
4492                 }
4493                 case OP_STORER8_MEMBASE_REG:
4494                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
4495                         break;
4496                 case OP_LOADR8_MEMBASE:
4497                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
4498                         break;
4499                 case OP_STORER4_MEMBASE_REG:
4500                         /* This requires a double->single conversion */
4501                         amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1);
4502                         amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15);
4503                         break;
4504                 case OP_LOADR4_MEMBASE:
4505                         amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
4506                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4507                         break;
4508                 case OP_ICONV_TO_R4: /* FIXME: change precision */
4509                 case OP_ICONV_TO_R8:
4510                         amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4511                         break;
4512                 case OP_LCONV_TO_R4: /* FIXME: change precision */
4513                 case OP_LCONV_TO_R8:
4514                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
4515                         break;
4516                 case OP_FCONV_TO_R4:
4517                         /* FIXME: nothing to do ?? */
4518                         break;
4519                 case OP_FCONV_TO_I1:
4520                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
4521                         break;
4522                 case OP_FCONV_TO_U1:
4523                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
4524                         break;
4525                 case OP_FCONV_TO_I2:
4526                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
4527                         break;
4528                 case OP_FCONV_TO_U2:
4529                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
4530                         break;
4531                 case OP_FCONV_TO_U4:
4532                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);                  
4533                         break;
4534                 case OP_FCONV_TO_I4:
4535                 case OP_FCONV_TO_I:
4536                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
4537                         break;
4538                 case OP_FCONV_TO_I8:
4539                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
4540                         break;
4541                 case OP_LCONV_TO_R_UN: { 
4542                         guint8 *br [2];
4543
4544                         /* Based on gcc code */
4545                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
4546                         br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE);
4547
4548                         /* Positive case */
4549                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
4550                         br [1] = code; x86_jump8 (code, 0);
4551                         amd64_patch (br [0], code);
4552
4553                         /* Negative case */
4554                         /* Save to the red zone */
4555                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8);
4556                         amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
4557                         amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8);
4558                         amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
4559                         amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1);
4560                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1);
4561                         amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX);
4562                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX);
4563                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg);
4564                         /* Restore */
4565                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
4566                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8);
4567                         amd64_patch (br [1], code);
4568                         break;
4569                 }
4570                 case OP_LCONV_TO_OVF_U4:
4571                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
4572                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
4573                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
4574                         break;
4575                 case OP_LCONV_TO_OVF_I4_UN:
4576                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
4577                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
4578                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
4579                         break;
4580                 case OP_FMOVE:
4581                         if (ins->dreg != ins->sreg1)
4582                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
4583                         break;
4584                 case OP_FADD:
4585                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
4586                         break;
4587                 case OP_FSUB:
4588                         amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
4589                         break;          
4590                 case OP_FMUL:
4591                         amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
4592                         break;          
4593                 case OP_FDIV:
4594                         amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
4595                         break;          
4596                 case OP_FNEG: {
4597                         static double r8_0 = -0.0;
4598
4599                         g_assert (ins->sreg1 == ins->dreg);
4600                                         
4601                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0);
4602                         amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4603                         break;
4604                 }
4605                 case OP_SIN:
4606                         EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
4607                         break;          
4608                 case OP_COS:
4609                         EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
4610                         break;          
4611                 case OP_ABS: {
4612                         static guint64 d = 0x7fffffffffffffffUL;
4613
4614                         g_assert (ins->sreg1 == ins->dreg);
4615                                         
4616                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &d);
4617                         amd64_sse_andpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4618                         break;          
4619                 }
4620                 case OP_SQRT:
4621                         EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
4622                         break;
4623                 case OP_IMIN:
4624                         g_assert (cfg->opt & MONO_OPT_CMOV);
4625                         g_assert (ins->dreg == ins->sreg1);
4626                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4627                         amd64_cmov_reg_size (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2, 4);
4628                         break;
4629                 case OP_IMIN_UN:
4630                         g_assert (cfg->opt & MONO_OPT_CMOV);
4631                         g_assert (ins->dreg == ins->sreg1);
4632                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4633                         amd64_cmov_reg_size (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2, 4);
4634                         break;
4635                 case OP_IMAX:
4636                         g_assert (cfg->opt & MONO_OPT_CMOV);
4637                         g_assert (ins->dreg == ins->sreg1);
4638                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4639                         amd64_cmov_reg_size (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2, 4);
4640                         break;
4641                 case OP_IMAX_UN:
4642                         g_assert (cfg->opt & MONO_OPT_CMOV);
4643                         g_assert (ins->dreg == ins->sreg1);
4644                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4645                         amd64_cmov_reg_size (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2, 4);
4646                         break;
4647                 case OP_LMIN:
4648                         g_assert (cfg->opt & MONO_OPT_CMOV);
4649                         g_assert (ins->dreg == ins->sreg1);
4650                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
4651                         amd64_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
4652                         break;
4653                 case OP_LMIN_UN:
4654                         g_assert (cfg->opt & MONO_OPT_CMOV);
4655                         g_assert (ins->dreg == ins->sreg1);
4656                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
4657                         amd64_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
4658                         break;
4659                 case OP_LMAX:
4660                         g_assert (cfg->opt & MONO_OPT_CMOV);
4661                         g_assert (ins->dreg == ins->sreg1);
4662                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
4663                         amd64_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
4664                         break;
4665                 case OP_LMAX_UN:
4666                         g_assert (cfg->opt & MONO_OPT_CMOV);
4667                         g_assert (ins->dreg == ins->sreg1);
4668                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
4669                         amd64_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
4670                         break;  
4671                 case OP_X86_FPOP:
4672                         break;          
4673                 case OP_FCOMPARE:
4674                         /* 
4675                          * The two arguments are swapped because the fbranch instructions
4676                          * depend on this for the non-sse case to work.
4677                          */
4678                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
4679                         break;
4680                 case OP_FCEQ: {
4681                         /* zeroing the register at the start results in 
4682                          * shorter and faster code (we can also remove the widening op)
4683                          */
4684                         guchar *unordered_check;
4685                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4686                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
4687                         unordered_check = code;
4688                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4689                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
4690                         amd64_patch (unordered_check, code);
4691                         break;
4692                 }
4693                 case OP_FCLT:
4694                 case OP_FCLT_UN:
4695                         /* zeroing the register at the start results in 
4696                          * shorter and faster code (we can also remove the widening op)
4697                          */
4698                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4699                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
4700                         if (ins->opcode == OP_FCLT_UN) {
4701                                 guchar *unordered_check = code;
4702                                 guchar *jump_to_end;
4703                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4704                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
4705                                 jump_to_end = code;
4706                                 x86_jump8 (code, 0);
4707                                 amd64_patch (unordered_check, code);
4708                                 amd64_inc_reg (code, ins->dreg);
4709                                 amd64_patch (jump_to_end, code);
4710                         } else {
4711                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
4712                         }
4713                         break;
4714                 case OP_FCGT:
4715                 case OP_FCGT_UN: {
4716                         /* zeroing the register at the start results in 
4717                          * shorter and faster code (we can also remove the widening op)
4718                          */
4719                         guchar *unordered_check;
4720                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4721                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
4722                         if (ins->opcode == OP_FCGT) {
4723                                 unordered_check = code;
4724                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4725                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4726                                 amd64_patch (unordered_check, code);
4727                         } else {
4728                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4729                         }
4730                         break;
4731                 }
4732                 case OP_FCLT_MEMBASE:
4733                 case OP_FCGT_MEMBASE:
4734                 case OP_FCLT_UN_MEMBASE:
4735                 case OP_FCGT_UN_MEMBASE:
4736                 case OP_FCEQ_MEMBASE: {
4737                         guchar *unordered_check, *jump_to_end;
4738                         int x86_cond;
4739
4740                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4741                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
4742
4743                         switch (ins->opcode) {
4744                         case OP_FCEQ_MEMBASE:
4745                                 x86_cond = X86_CC_EQ;
4746                                 break;
4747                         case OP_FCLT_MEMBASE:
4748                         case OP_FCLT_UN_MEMBASE:
4749                                 x86_cond = X86_CC_LT;
4750                                 break;
4751                         case OP_FCGT_MEMBASE:
4752                         case OP_FCGT_UN_MEMBASE:
4753                                 x86_cond = X86_CC_GT;
4754                                 break;
4755                         default:
4756                                 g_assert_not_reached ();
4757                         }
4758
4759                         unordered_check = code;
4760                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4761                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
4762
4763                         switch (ins->opcode) {
4764                         case OP_FCEQ_MEMBASE:
4765                         case OP_FCLT_MEMBASE:
4766                         case OP_FCGT_MEMBASE:
4767                                 amd64_patch (unordered_check, code);
4768                                 break;
4769                         case OP_FCLT_UN_MEMBASE:
4770                         case OP_FCGT_UN_MEMBASE:
4771                                 jump_to_end = code;
4772                                 x86_jump8 (code, 0);
4773                                 amd64_patch (unordered_check, code);
4774                                 amd64_inc_reg (code, ins->dreg);
4775                                 amd64_patch (jump_to_end, code);
4776                                 break;
4777                         default:
4778                                 break;
4779                         }
4780                         break;
4781                 }
4782                 case OP_FBEQ: {
4783                         guchar *jump = code;
4784                         x86_branch8 (code, X86_CC_P, 0, TRUE);
4785                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4786                         amd64_patch (jump, code);
4787                         break;
4788                 }
4789                 case OP_FBNE_UN:
4790                         /* Branch if C013 != 100 */
4791                         /* branch if !ZF or (PF|CF) */
4792                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4793                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4794                         EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
4795                         break;
4796                 case OP_FBLT:
4797                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4798                         break;
4799                 case OP_FBLT_UN:
4800                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4801                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4802                         break;
4803                 case OP_FBGT:
4804                 case OP_FBGT_UN:
4805                         if (ins->opcode == OP_FBGT) {
4806                                 guchar *br1;
4807
4808                                 /* skip branch if C1=1 */
4809                                 br1 = code;
4810                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4811                                 /* branch if (C0 | C3) = 1 */
4812                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
4813                                 amd64_patch (br1, code);
4814                                 break;
4815                         } else {
4816                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
4817                         }
4818                         break;
4819                 case OP_FBGE: {
4820                         /* Branch if C013 == 100 or 001 */
4821                         guchar *br1;
4822
4823                         /* skip branch if C1=1 */
4824                         br1 = code;
4825                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4826                         /* branch if (C0 | C3) = 1 */
4827                         EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
4828                         amd64_patch (br1, code);
4829                         break;
4830                 }
4831                 case OP_FBGE_UN:
4832                         /* Branch if C013 == 000 */
4833                         EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
4834                         break;
4835                 case OP_FBLE: {
4836                         /* Branch if C013=000 or 100 */
4837                         guchar *br1;
4838
4839                         /* skip branch if C1=1 */
4840                         br1 = code;
4841                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4842                         /* branch if C0=0 */
4843                         EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
4844                         amd64_patch (br1, code);
4845                         break;
4846                 }
4847                 case OP_FBLE_UN:
4848                         /* Branch if C013 != 001 */
4849                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4850                         EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4851                         break;
4852                 case OP_CKFINITE:
4853                         /* Transfer value to the fp stack */
4854                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
4855                         amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
4856                         amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
4857
4858                         amd64_push_reg (code, AMD64_RAX);
4859                         amd64_fxam (code);
4860                         amd64_fnstsw (code);
4861                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
4862                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4863                         amd64_pop_reg (code, AMD64_RAX);
4864                         amd64_fstp (code, 0);
4865                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4866                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
4867                         break;
4868                 case OP_TLS_GET: {
4869                         code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset);
4870                         break;
4871                 }
4872                 case OP_MEMORY_BARRIER: {
4873                         /* Not needed on amd64 */
4874                         break;
4875                 }
4876                 case OP_ATOMIC_ADD_I4:
4877                 case OP_ATOMIC_ADD_I8: {
4878                         int dreg = ins->dreg;
4879                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
4880
4881                         if (dreg == ins->inst_basereg)
4882                                 dreg = AMD64_R11;
4883                         
4884                         if (dreg != ins->sreg2)
4885                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg2, size);
4886
4887                         x86_prefix (code, X86_LOCK_PREFIX);
4888                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
4889
4890                         if (dreg != ins->dreg)
4891                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
4892
4893                         break;
4894                 }
4895                 case OP_ATOMIC_ADD_NEW_I4:
4896                 case OP_ATOMIC_ADD_NEW_I8: {
4897                         int dreg = ins->dreg;
4898                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_NEW_I4) ? 4 : 8;
4899
4900                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
4901                                 dreg = AMD64_R11;
4902
4903                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
4904                         amd64_prefix (code, X86_LOCK_PREFIX);
4905                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
4906                         /* dreg contains the old value, add with sreg2 value */
4907                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
4908                         
4909                         if (ins->dreg != dreg)
4910                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
4911
4912                         break;
4913                 }
4914                 case OP_ATOMIC_EXCHANGE_I4:
4915                 case OP_ATOMIC_EXCHANGE_I8: {
4916                         guchar *br[2];
4917                         int sreg2 = ins->sreg2;
4918                         int breg = ins->inst_basereg;
4919                         guint32 size;
4920                         gboolean need_push = FALSE, rdx_pushed = FALSE;
4921
4922                         if (ins->opcode == OP_ATOMIC_EXCHANGE_I8)
4923                                 size = 8;
4924                         else
4925                                 size = 4;
4926
4927                         /* 
4928                          * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
4929                          * an explanation of how this works.
4930                          */
4931
4932                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4933                          * hack to overcome limits in x86 reg allocator 
4934                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4935                          */
4936                         g_assert (ins->dreg == AMD64_RAX);
4937
4938                         if (breg == AMD64_RAX && ins->sreg2 == AMD64_RAX)
4939                                 /* Highly unlikely, but possible */
4940                                 need_push = TRUE;
4941
4942                         /* The pushes invalidate rsp */
4943                         if ((breg == AMD64_RAX) || need_push) {
4944                                 amd64_mov_reg_reg (code, AMD64_R11, breg, 8);
4945                                 breg = AMD64_R11;
4946                         }
4947
4948                         /* We need the EAX reg for the comparand */
4949                         if (ins->sreg2 == AMD64_RAX) {
4950                                 if (breg != AMD64_R11) {
4951                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4952                                         sreg2 = AMD64_R11;
4953                                 } else {
4954                                         g_assert (need_push);
4955                                         amd64_push_reg (code, AMD64_RDX);
4956                                         amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
4957                                         sreg2 = AMD64_RDX;
4958                                         rdx_pushed = TRUE;
4959                                 }
4960                         }
4961
4962                         amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
4963
4964                         br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
4965                         amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
4966                         br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
4967                         amd64_patch (br [1], br [0]);
4968
4969                         if (rdx_pushed)
4970                                 amd64_pop_reg (code, AMD64_RDX);
4971
4972                         break;
4973                 }
4974                 case OP_ATOMIC_CAS_I4:
4975                 case OP_ATOMIC_CAS_I8: {
4976                         guint32 size;
4977
4978                         if (ins->opcode == OP_ATOMIC_CAS_I8)
4979                                 size = 8;
4980                         else
4981                                 size = 4;
4982
4983                         /* 
4984                          * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
4985                          * an explanation of how this works.
4986                          */
4987                         g_assert (ins->sreg3 == AMD64_RAX);
4988                         g_assert (ins->sreg1 != AMD64_RAX);
4989                         g_assert (ins->sreg1 != ins->sreg2);
4990
4991                         amd64_prefix (code, X86_LOCK_PREFIX);
4992                         amd64_cmpxchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
4993
4994                         if (ins->dreg != AMD64_RAX)
4995                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
4996                         break;
4997                 }
4998 #ifdef MONO_ARCH_SIMD_INTRINSICS
4999                 /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
5000                 case OP_ADDPS:
5001                         amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
5002                         break;
5003                 case OP_DIVPS:
5004                         amd64_sse_divps_reg_reg (code, ins->sreg1, ins->sreg2);
5005                         break;
5006                 case OP_MULPS:
5007                         amd64_sse_mulps_reg_reg (code, ins->sreg1, ins->sreg2);
5008                         break;
5009                 case OP_SUBPS:
5010                         amd64_sse_subps_reg_reg (code, ins->sreg1, ins->sreg2);
5011                         break;
5012                 case OP_MAXPS:
5013                         amd64_sse_maxps_reg_reg (code, ins->sreg1, ins->sreg2);
5014                         break;
5015                 case OP_MINPS:
5016                         amd64_sse_minps_reg_reg (code, ins->sreg1, ins->sreg2);
5017                         break;
5018                 case OP_COMPPS:
5019                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
5020                         amd64_sse_cmpps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5021                         break;
5022                 case OP_ANDPS:
5023                         amd64_sse_andps_reg_reg (code, ins->sreg1, ins->sreg2);
5024                         break;
5025                 case OP_ANDNPS:
5026                         amd64_sse_andnps_reg_reg (code, ins->sreg1, ins->sreg2);
5027                         break;
5028                 case OP_ORPS:
5029                         amd64_sse_orps_reg_reg (code, ins->sreg1, ins->sreg2);
5030                         break;
5031                 case OP_XORPS:
5032                         amd64_sse_xorps_reg_reg (code, ins->sreg1, ins->sreg2);
5033                         break;
5034                 case OP_SQRTPS:
5035                         amd64_sse_sqrtps_reg_reg (code, ins->dreg, ins->sreg1);
5036                         break;
5037                 case OP_RSQRTPS:
5038                         amd64_sse_rsqrtps_reg_reg (code, ins->dreg, ins->sreg1);
5039                         break;
5040                 case OP_RCPPS:
5041                         amd64_sse_rcpps_reg_reg (code, ins->dreg, ins->sreg1);
5042                         break;
5043                 case OP_ADDSUBPS:
5044                         amd64_sse_addsubps_reg_reg (code, ins->sreg1, ins->sreg2);
5045                         break;
5046                 case OP_HADDPS:
5047                         amd64_sse_haddps_reg_reg (code, ins->sreg1, ins->sreg2);
5048                         break;
5049                 case OP_HSUBPS:
5050                         amd64_sse_hsubps_reg_reg (code, ins->sreg1, ins->sreg2);
5051                         break;
5052                 case OP_DUPPS_HIGH:
5053                         amd64_sse_movshdup_reg_reg (code, ins->dreg, ins->sreg1);
5054                         break;
5055                 case OP_DUPPS_LOW:
5056                         amd64_sse_movsldup_reg_reg (code, ins->dreg, ins->sreg1);
5057                         break;
5058
5059                 case OP_PSHUFLEW_HIGH:
5060                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5061                         amd64_sse_pshufhw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5062                         break;
5063                 case OP_PSHUFLEW_LOW:
5064                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5065                         amd64_sse_pshuflw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5066                         break;
5067                 case OP_PSHUFLED:
5068                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5069                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5070                         break;
5071
5072                 case OP_ADDPD:
5073                         amd64_sse_addpd_reg_reg (code, ins->sreg1, ins->sreg2);
5074                         break;
5075                 case OP_DIVPD:
5076                         amd64_sse_divpd_reg_reg (code, ins->sreg1, ins->sreg2);
5077                         break;
5078                 case OP_MULPD:
5079                         amd64_sse_mulpd_reg_reg (code, ins->sreg1, ins->sreg2);
5080                         break;
5081                 case OP_SUBPD:
5082                         amd64_sse_subpd_reg_reg (code, ins->sreg1, ins->sreg2);
5083                         break;
5084                 case OP_MAXPD:
5085                         amd64_sse_maxpd_reg_reg (code, ins->sreg1, ins->sreg2);
5086                         break;
5087                 case OP_MINPD:
5088                         amd64_sse_minpd_reg_reg (code, ins->sreg1, ins->sreg2);
5089                         break;
5090                 case OP_COMPPD:
5091                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
5092                         amd64_sse_cmppd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5093                         break;
5094                 case OP_ANDPD:
5095                         amd64_sse_andpd_reg_reg (code, ins->sreg1, ins->sreg2);
5096                         break;
5097                 case OP_ANDNPD:
5098                         amd64_sse_andnpd_reg_reg (code, ins->sreg1, ins->sreg2);
5099                         break;
5100                 case OP_ORPD:
5101                         amd64_sse_orpd_reg_reg (code, ins->sreg1, ins->sreg2);
5102                         break;
5103                 case OP_XORPD:
5104                         amd64_sse_xorpd_reg_reg (code, ins->sreg1, ins->sreg2);
5105                         break;
5106                 case OP_SQRTPD:
5107                         amd64_sse_sqrtpd_reg_reg (code, ins->dreg, ins->sreg1);
5108                         break;
5109                 case OP_ADDSUBPD:
5110                         amd64_sse_addsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
5111                         break;
5112                 case OP_HADDPD:
5113                         amd64_sse_haddpd_reg_reg (code, ins->sreg1, ins->sreg2);
5114                         break;
5115                 case OP_HSUBPD:
5116                         amd64_sse_hsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
5117                         break;
5118                 case OP_DUPPD:
5119                         amd64_sse_movddup_reg_reg (code, ins->dreg, ins->sreg1);
5120                         break;
5121
5122                 case OP_EXTRACT_MASK:
5123                         amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
5124                         break;
5125
5126                 case OP_PAND:
5127                         amd64_sse_pand_reg_reg (code, ins->sreg1, ins->sreg2);
5128                         break;
5129                 case OP_POR:
5130                         amd64_sse_por_reg_reg (code, ins->sreg1, ins->sreg2);
5131                         break;
5132                 case OP_PXOR:
5133                         amd64_sse_pxor_reg_reg (code, ins->sreg1, ins->sreg2);
5134                         break;
5135
5136                 case OP_PADDB:
5137                         amd64_sse_paddb_reg_reg (code, ins->sreg1, ins->sreg2);
5138                         break;
5139                 case OP_PADDW:
5140                         amd64_sse_paddw_reg_reg (code, ins->sreg1, ins->sreg2);
5141                         break;
5142                 case OP_PADDD:
5143                         amd64_sse_paddd_reg_reg (code, ins->sreg1, ins->sreg2);
5144                         break;
5145                 case OP_PADDQ:
5146                         amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
5147                         break;
5148
5149                 case OP_PSUBB:
5150                         amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
5151                         break;
5152                 case OP_PSUBW:
5153                         amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
5154                         break;
5155                 case OP_PSUBD:
5156                         amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
5157                         break;
5158                 case OP_PSUBQ:
5159                         amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
5160                         break;
5161
5162                 case OP_PMAXB_UN:
5163                         amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
5164                         break;
5165                 case OP_PMAXW_UN:
5166                         amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
5167                         break;
5168                 case OP_PMAXD_UN:
5169                         amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
5170                         break;
5171                 
5172                 case OP_PMAXB:
5173                         amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
5174                         break;
5175                 case OP_PMAXW:
5176                         amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
5177                         break;
5178                 case OP_PMAXD:
5179                         amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
5180                         break;
5181
5182                 case OP_PAVGB_UN:
5183                         amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
5184                         break;
5185                 case OP_PAVGW_UN:
5186                         amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
5187                         break;
5188
5189                 case OP_PMINB_UN:
5190                         amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
5191                         break;
5192                 case OP_PMINW_UN:
5193                         amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
5194                         break;
5195                 case OP_PMIND_UN:
5196                         amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
5197                         break;
5198
5199                 case OP_PMINB:
5200                         amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
5201                         break;
5202                 case OP_PMINW:
5203                         amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
5204                         break;
5205                 case OP_PMIND:
5206                         amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
5207                         break;
5208
5209                 case OP_PCMPEQB:
5210                         amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
5211                         break;
5212                 case OP_PCMPEQW:
5213                         amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
5214                         break;
5215                 case OP_PCMPEQD:
5216                         amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
5217                         break;
5218                 case OP_PCMPEQQ:
5219                         amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
5220                         break;
5221
5222                 case OP_PCMPGTB:
5223                         amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
5224                         break;
5225                 case OP_PCMPGTW:
5226                         amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
5227                         break;
5228                 case OP_PCMPGTD:
5229                         amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
5230                         break;
5231                 case OP_PCMPGTQ:
5232                         amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
5233                         break;
5234
5235                 case OP_PSUM_ABS_DIFF:
5236                         amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
5237                         break;
5238
5239                 case OP_UNPACK_LOWB:
5240                         amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
5241                         break;
5242                 case OP_UNPACK_LOWW:
5243                         amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
5244                         break;
5245                 case OP_UNPACK_LOWD:
5246                         amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
5247                         break;
5248                 case OP_UNPACK_LOWQ:
5249                         amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
5250                         break;
5251                 case OP_UNPACK_LOWPS:
5252                         amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
5253                         break;
5254                 case OP_UNPACK_LOWPD:
5255                         amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
5256                         break;
5257
5258                 case OP_UNPACK_HIGHB:
5259                         amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
5260                         break;
5261                 case OP_UNPACK_HIGHW:
5262                         amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
5263                         break;
5264                 case OP_UNPACK_HIGHD:
5265                         amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
5266                         break;
5267                 case OP_UNPACK_HIGHQ:
5268                         amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
5269                         break;
5270                 case OP_UNPACK_HIGHPS:
5271                         amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
5272                         break;
5273                 case OP_UNPACK_HIGHPD:
5274                         amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
5275                         break;
5276
5277                 case OP_PACKW:
5278                         amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
5279                         break;
5280                 case OP_PACKD:
5281                         amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
5282                         break;
5283                 case OP_PACKW_UN:
5284                         amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
5285                         break;
5286                 case OP_PACKD_UN:
5287                         amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
5288                         break;
5289
5290                 case OP_PADDB_SAT_UN:
5291                         amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
5292                         break;
5293                 case OP_PSUBB_SAT_UN:
5294                         amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
5295                         break;
5296                 case OP_PADDW_SAT_UN:
5297                         amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
5298                         break;
5299                 case OP_PSUBW_SAT_UN:
5300                         amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
5301                         break;
5302
5303                 case OP_PADDB_SAT:
5304                         amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
5305                         break;
5306                 case OP_PSUBB_SAT:
5307                         amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
5308                         break;
5309                 case OP_PADDW_SAT:
5310                         amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
5311                         break;
5312                 case OP_PSUBW_SAT:
5313                         amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
5314                         break;
5315                         
5316                 case OP_PMULW:
5317                         amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
5318                         break;
5319                 case OP_PMULD:
5320                         amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
5321                         break;
5322                 case OP_PMULQ:
5323                         amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
5324                         break;
5325                 case OP_PMULW_HIGH_UN:
5326                         amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
5327                         break;
5328                 case OP_PMULW_HIGH:
5329                         amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
5330                         break;
5331
5332                 case OP_PSHRW:
5333                         amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
5334                         break;
5335                 case OP_PSHRW_REG:
5336                         amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
5337                         break;
5338
5339                 case OP_PSARW:
5340                         amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
5341                         break;
5342                 case OP_PSARW_REG:
5343                         amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
5344                         break;
5345
5346                 case OP_PSHLW:
5347                         amd64_sse_psllw_reg_imm (code, ins->dreg, ins->inst_imm);
5348                         break;
5349                 case OP_PSHLW_REG:
5350                         amd64_sse_psllw_reg_reg (code, ins->dreg, ins->sreg2);
5351                         break;
5352
5353                 case OP_PSHRD:
5354                         amd64_sse_psrld_reg_imm (code, ins->dreg, ins->inst_imm);
5355                         break;
5356                 case OP_PSHRD_REG:
5357                         amd64_sse_psrld_reg_reg (code, ins->dreg, ins->sreg2);
5358                         break;
5359
5360                 case OP_PSARD:
5361                         amd64_sse_psrad_reg_imm (code, ins->dreg, ins->inst_imm);
5362                         break;
5363                 case OP_PSARD_REG:
5364                         amd64_sse_psrad_reg_reg (code, ins->dreg, ins->sreg2);
5365                         break;
5366
5367                 case OP_PSHLD:
5368                         amd64_sse_pslld_reg_imm (code, ins->dreg, ins->inst_imm);
5369                         break;
5370                 case OP_PSHLD_REG:
5371                         amd64_sse_pslld_reg_reg (code, ins->dreg, ins->sreg2);
5372                         break;
5373
5374                 case OP_PSHRQ:
5375                         amd64_sse_psrlq_reg_imm (code, ins->dreg, ins->inst_imm);
5376                         break;
5377                 case OP_PSHRQ_REG:
5378                         amd64_sse_psrlq_reg_reg (code, ins->dreg, ins->sreg2);
5379                         break;
5380                 
5381                 /*TODO: This is appart of the sse spec but not added
5382                 case OP_PSARQ:
5383                         amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
5384                         break;
5385                 case OP_PSARQ_REG:
5386                         amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
5387                         break;  
5388                 */
5389         
5390                 case OP_PSHLQ:
5391                         amd64_sse_psllq_reg_imm (code, ins->dreg, ins->inst_imm);
5392                         break;
5393                 case OP_PSHLQ_REG:
5394                         amd64_sse_psllq_reg_reg (code, ins->dreg, ins->sreg2);
5395                         break;  
5396
5397                 case OP_ICONV_TO_X:
5398                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
5399                         break;
5400                 case OP_EXTRACT_I4:
5401                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
5402                         break;
5403                 case OP_EXTRACT_I8:
5404                         if (ins->inst_c0) {
5405                                 amd64_movhlps_reg_reg (code, AMD64_XMM15, ins->sreg1);
5406                                 amd64_movd_reg_xreg_size (code, ins->dreg, AMD64_XMM15, 8);
5407                         } else {
5408                                 amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
5409                         }
5410                         break;
5411                 case OP_EXTRACT_I1:
5412                 case OP_EXTRACT_U1:
5413                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
5414                         if (ins->inst_c0)
5415                                 amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
5416                         amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
5417                         break;
5418                 case OP_EXTRACT_I2:
5419                 case OP_EXTRACT_U2:
5420                         /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
5421                         if (ins->inst_c0)
5422                                 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
5423                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5424                         amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4);
5425                         break;
5426                 case OP_EXTRACT_R8:
5427                         if (ins->inst_c0)
5428                                 amd64_movhlps_reg_reg (code, ins->dreg, ins->sreg1);
5429                         else
5430                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5431                         break;
5432                 case OP_INSERT_I2:
5433                         amd64_sse_pinsrw_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5434                         break;
5435                 case OP_EXTRACTX_U2:
5436                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5437                         break;
5438                 case OP_INSERTX_U1_SLOW:
5439                         /*sreg1 is the extracted ireg (scratch)
5440                         /sreg2 is the to be inserted ireg (scratch)
5441                         /dreg is the xreg to receive the value*/
5442
5443                         /*clear the bits from the extracted word*/
5444                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
5445                         /*shift the value to insert if needed*/
5446                         if (ins->inst_c0 & 1)
5447                                 amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4);
5448                         /*join them together*/
5449                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
5450                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
5451                         break;
5452                 case OP_INSERTX_I4_SLOW:
5453                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
5454                         amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
5455                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
5456                         break;
5457                 case OP_INSERTX_I8_SLOW:
5458                         amd64_movd_xreg_reg_size(code, AMD64_XMM15, ins->sreg2, 8);
5459                         if (ins->inst_c0)
5460                                 amd64_movlhps_reg_reg (code, ins->dreg, AMD64_XMM15);
5461                         else
5462                                 amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM15);
5463                         break;
5464
5465                 case OP_INSERTX_R4_SLOW:
5466                         switch (ins->inst_c0) {
5467                         case 0:
5468                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
5469                                 break;
5470                         case 1:
5471                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
5472                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
5473                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
5474                                 break;
5475                         case 2:
5476                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
5477                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
5478                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
5479                                 break;
5480                         case 3:
5481                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
5482                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
5483                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
5484                                 break;
5485                         }
5486                         break;
5487                 case OP_INSERTX_R8_SLOW:
5488                         if (ins->inst_c0)
5489                                 amd64_movlhps_reg_reg (code, ins->dreg, ins->sreg2);
5490                         else
5491                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg2);
5492                         break;
5493                 case OP_STOREX_MEMBASE_REG:
5494                 case OP_STOREX_MEMBASE:
5495                         amd64_sse_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
5496                         break;
5497                 case OP_LOADX_MEMBASE:
5498                         amd64_sse_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
5499                         break;
5500                 case OP_LOADX_ALIGNED_MEMBASE:
5501                         amd64_sse_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
5502                         break;
5503                 case OP_STOREX_ALIGNED_MEMBASE_REG:
5504                         amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
5505                         break;
5506                 case OP_STOREX_NTA_MEMBASE_REG:
5507                         amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
5508                         break;
5509                 case OP_PREFETCH_MEMBASE:
5510                         amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
5511                         break;
5512
5513                 case OP_XMOVE:
5514                         /*FIXME the peephole pass should have killed this*/
5515                         if (ins->dreg != ins->sreg1)
5516                                 amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1);
5517                         break;          
5518                 case OP_XZERO:
5519                         amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
5520                         break;
5521                 case OP_ICONV_TO_R8_RAW:
5522                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
5523                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5524                         break;
5525
5526                 case OP_FCONV_TO_R8_X:
5527                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5528                         break;
5529
5530                 case OP_XCONV_R8_TO_I4:
5531                         amd64_sse_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
5532                         switch (ins->backend.source_opcode) {
5533                         case OP_FCONV_TO_I1:
5534                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
5535                                 break;
5536                         case OP_FCONV_TO_U1:
5537                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
5538                                 break;
5539                         case OP_FCONV_TO_I2:
5540                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
5541                                 break;
5542                         case OP_FCONV_TO_U2:
5543                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
5544                                 break;
5545                         }                       
5546                         break;
5547
5548                 case OP_EXPAND_I2:
5549                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 0);
5550                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 1);
5551                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
5552                         break;
5553                 case OP_EXPAND_I4:
5554                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
5555                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
5556                         break;
5557                 case OP_EXPAND_I8:
5558                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
5559                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
5560                         break;
5561                 case OP_EXPAND_R4:
5562                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5563                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->dreg);
5564                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
5565                         break;
5566                 case OP_EXPAND_R8:
5567                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5568                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
5569                         break;
5570 #endif
5571                 case OP_LIVERANGE_START: {
5572                         if (cfg->verbose_level > 1)
5573                                 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
5574                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
5575                         break;
5576                 }
5577                 case OP_LIVERANGE_END: {
5578                         if (cfg->verbose_level > 1)
5579                                 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
5580                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
5581                         break;
5582                 }
5583                 default:
5584                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
5585                         g_assert_not_reached ();
5586                 }
5587
5588                 if ((code - cfg->native_code - offset) > max_len) {
5589                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
5590                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
5591                         g_assert_not_reached ();
5592                 }
5593                
5594                 last_ins = ins;
5595                 last_offset = offset;
5596         }
5597
5598         cfg->code_len = code - cfg->native_code;
5599 }
5600
5601 #endif /* DISABLE_JIT */
5602
5603 void
5604 mono_arch_register_lowlevel_calls (void)
5605 {
5606         /* The signature doesn't matter */
5607         mono_register_jit_icall (mono_amd64_throw_exception, "mono_amd64_throw_exception", mono_create_icall_signature ("void"), TRUE);
5608 }
5609
5610 void
5611 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
5612 {
5613         MonoJumpInfo *patch_info;
5614         gboolean compile_aot = !run_cctors;
5615
5616         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
5617                 unsigned char *ip = patch_info->ip.i + code;
5618                 unsigned char *target;
5619
5620                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
5621
5622                 if (compile_aot) {
5623                         switch (patch_info->type) {
5624                         case MONO_PATCH_INFO_BB:
5625                         case MONO_PATCH_INFO_LABEL:
5626                                 break;
5627                         default:
5628                                 /* No need to patch these */
5629                                 continue;
5630                         }
5631                 }
5632
5633                 switch (patch_info->type) {
5634                 case MONO_PATCH_INFO_NONE:
5635                         continue;
5636                 case MONO_PATCH_INFO_METHOD_REL:
5637                 case MONO_PATCH_INFO_R8:
5638                 case MONO_PATCH_INFO_R4:
5639                         g_assert_not_reached ();
5640                         continue;
5641                 case MONO_PATCH_INFO_BB:
5642                         break;
5643                 default:
5644                         break;
5645                 }
5646
5647                 /* 
5648                  * Debug code to help track down problems where the target of a near call is
5649                  * is not valid.
5650                  */
5651                 if (amd64_is_near_call (ip)) {
5652                         gint64 disp = (guint8*)target - (guint8*)ip;
5653
5654                         if (!amd64_is_imm32 (disp)) {
5655                                 printf ("TYPE: %d\n", patch_info->type);
5656                                 switch (patch_info->type) {
5657                                 case MONO_PATCH_INFO_INTERNAL_METHOD:
5658                                         printf ("V: %s\n", patch_info->data.name);
5659                                         break;
5660                                 case MONO_PATCH_INFO_METHOD_JUMP:
5661                                 case MONO_PATCH_INFO_METHOD:
5662                                         printf ("V: %s\n", patch_info->data.method->name);
5663                                         break;
5664                                 default:
5665                                         break;
5666                                 }
5667                         }
5668                 }
5669
5670                 amd64_patch (ip, (gpointer)target);
5671         }
5672 }
5673
5674 #ifndef DISABLE_JIT
5675
5676 static int
5677 get_max_epilog_size (MonoCompile *cfg)
5678 {
5679         int max_epilog_size = 16;
5680         
5681         if (cfg->method->save_lmf)
5682                 max_epilog_size += 256;
5683         
5684         if (mono_jit_trace_calls != NULL)
5685                 max_epilog_size += 50;
5686
5687         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
5688                 max_epilog_size += 50;
5689
5690         max_epilog_size += (AMD64_NREG * 2);
5691
5692         return max_epilog_size;
5693 }
5694
5695 /*
5696  * This macro is used for testing whenever the unwinder works correctly at every point
5697  * where an async exception can happen.
5698  */
5699 /* This will generate a SIGSEGV at the given point in the code */
5700 #define async_exc_point(code) do { \
5701     if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
5702          if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
5703              amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
5704          cfg->arch.async_point_count ++; \
5705     } \
5706 } while (0)
5707
5708 guint8 *
5709 mono_arch_emit_prolog (MonoCompile *cfg)
5710 {
5711         MonoMethod *method = cfg->method;
5712         MonoBasicBlock *bb;
5713         MonoMethodSignature *sig;
5714         MonoInst *ins;
5715         int alloc_size, pos, i, cfa_offset, quad, max_epilog_size;
5716         guint8 *code;
5717         CallInfo *cinfo;
5718         gint32 lmf_offset = cfg->arch.lmf_offset;
5719         gboolean args_clobbered = FALSE;
5720         gboolean trace = FALSE;
5721
5722         cfg->code_size =  MAX (cfg->header->code_size * 4, 10240);
5723
5724         code = cfg->native_code = g_malloc (cfg->code_size);
5725
5726         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
5727                 trace = TRUE;
5728
5729         /* Amount of stack space allocated by register saving code */
5730         pos = 0;
5731
5732         /* Offset between RSP and the CFA */
5733         cfa_offset = 0;
5734
5735         /* 
5736          * The prolog consists of the following parts:
5737          * FP present:
5738          * - push rbp, mov rbp, rsp
5739          * - save callee saved regs using pushes
5740          * - allocate frame
5741          * - save rgctx if needed
5742          * - save lmf if needed
5743          * FP not present:
5744          * - allocate frame
5745          * - save rgctx if needed
5746          * - save lmf if needed
5747          * - save callee saved regs using moves
5748          */
5749
5750         // CFA = sp + 8
5751         cfa_offset = 8;
5752         mono_emit_unwind_op_def_cfa (cfg, code, AMD64_RSP, 8);
5753         // IP saved at CFA - 8
5754         mono_emit_unwind_op_offset (cfg, code, AMD64_RIP, -cfa_offset);
5755         async_exc_point (code);
5756
5757         if (!cfg->arch.omit_fp) {
5758                 amd64_push_reg (code, AMD64_RBP);
5759                 cfa_offset += 8;
5760                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
5761                 mono_emit_unwind_op_offset (cfg, code, AMD64_RBP, - cfa_offset);
5762                 async_exc_point (code);
5763 #ifdef HOST_WIN32
5764                 mono_arch_unwindinfo_add_push_nonvol (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
5765 #endif
5766                 
5767                 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
5768                 mono_emit_unwind_op_def_cfa_reg (cfg, code, AMD64_RBP);
5769                 async_exc_point (code);
5770 #ifdef HOST_WIN32
5771                 mono_arch_unwindinfo_add_set_fpreg (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
5772 #endif
5773         }
5774
5775         /* Save callee saved registers */
5776         if (!cfg->arch.omit_fp && !method->save_lmf) {
5777                 int offset = cfa_offset;
5778
5779                 for (i = 0; i < AMD64_NREG; ++i)
5780                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
5781                                 amd64_push_reg (code, i);
5782                                 pos += sizeof (gpointer);
5783                                 offset += 8;
5784                                 mono_emit_unwind_op_offset (cfg, code, i, - offset);
5785                                 async_exc_point (code);
5786                         }
5787         }
5788
5789         /* The param area is always at offset 0 from sp */
5790         /* This needs to be allocated here, since it has to come after the spill area */
5791         if (cfg->arch.no_pushes && cfg->param_area) {
5792                 if (cfg->arch.omit_fp)
5793                         // FIXME:
5794                         g_assert_not_reached ();
5795                 cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof (gpointer));
5796         }
5797
5798         if (cfg->arch.omit_fp) {
5799                 /* 
5800                  * On enter, the stack is misaligned by the the pushing of the return
5801                  * address. It is either made aligned by the pushing of %rbp, or by
5802                  * this.
5803                  */
5804                 alloc_size = ALIGN_TO (cfg->stack_offset, 8);
5805                 if ((alloc_size % 16) == 0)
5806                         alloc_size += 8;
5807         } else {
5808                 alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
5809
5810                 alloc_size -= pos;
5811         }
5812
5813         cfg->arch.stack_alloc_size = alloc_size;
5814
5815         /* Allocate stack frame */
5816         if (alloc_size) {
5817                 /* See mono_emit_stack_alloc */
5818 #if defined(HOST_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
5819                 guint32 remaining_size = alloc_size;
5820                 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
5821                 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 10; /*10 is the max size of amd64_alu_reg_imm + amd64_test_membase_reg*/
5822                 guint32 offset = code - cfg->native_code;
5823                 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
5824                         while (required_code_size >= (cfg->code_size - offset))
5825                                 cfg->code_size *= 2;
5826                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
5827                         code = cfg->native_code + offset;
5828                         mono_jit_stats.code_reallocs++;
5829                 }
5830
5831                 while (remaining_size >= 0x1000) {
5832                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
5833                         if (cfg->arch.omit_fp) {
5834                                 cfa_offset += 0x1000;
5835                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
5836                         }
5837                         async_exc_point (code);
5838 #ifdef HOST_WIN32
5839                         if (cfg->arch.omit_fp) 
5840                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, 0x1000);
5841 #endif
5842
5843                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
5844                         remaining_size -= 0x1000;
5845                 }
5846                 if (remaining_size) {
5847                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
5848                         if (cfg->arch.omit_fp) {
5849                                 cfa_offset += remaining_size;
5850                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
5851                                 async_exc_point (code);
5852                         }
5853 #ifdef HOST_WIN32
5854                         if (cfg->arch.omit_fp) 
5855                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, remaining_size);
5856 #endif
5857                 }
5858 #else
5859                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
5860                 if (cfg->arch.omit_fp) {
5861                         cfa_offset += alloc_size;
5862                         mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
5863                         async_exc_point (code);
5864                 }
5865 #endif
5866         }
5867
5868         /* Stack alignment check */
5869 #if 0
5870         {
5871                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
5872                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
5873                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
5874                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
5875                 amd64_breakpoint (code);
5876         }
5877 #endif
5878
5879 #ifndef TARGET_WIN32
5880         if (mini_get_debug_options ()->init_stacks) {
5881                 /* Fill the stack frame with a dummy value to force deterministic behavior */
5882         
5883                 /* Save registers to the red zone */
5884                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDI, 8);
5885                 amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
5886
5887                 amd64_mov_reg_imm (code, AMD64_RAX, 0x2a2a2a2a2a2a2a2a);
5888                 amd64_mov_reg_imm (code, AMD64_RCX, alloc_size / 8);
5889                 amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RSP, 8);
5890
5891                 amd64_cld (code);
5892                 amd64_prefix (code, X86_REP_PREFIX);
5893                 amd64_stosl (code);
5894
5895                 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RSP, -8, 8);
5896                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
5897         }
5898 #endif  
5899
5900         /* Save LMF */
5901         if (method->save_lmf) {
5902                 /* 
5903                  * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
5904                  */
5905                 /* 
5906                  * sp is saved right before calls but we need to save it here too so
5907                  * async stack walks would work.
5908                  */
5909                 amd64_mov_membase_reg (code, cfg->frame_reg, cfg->arch.lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
5910                 /* Skip method (only needed for trampoline LMF frames) */
5911                 /* Save callee saved regs */
5912                 for (i = 0; i < MONO_MAX_IREGS; ++i) {
5913                         int offset;
5914
5915                         switch (i) {
5916                         case AMD64_RBX: offset = G_STRUCT_OFFSET (MonoLMF, rbx); break;
5917                         case AMD64_RBP: offset = G_STRUCT_OFFSET (MonoLMF, rbp); break;
5918                         case AMD64_R12: offset = G_STRUCT_OFFSET (MonoLMF, r12); break;
5919                         case AMD64_R13: offset = G_STRUCT_OFFSET (MonoLMF, r13); break;
5920                         case AMD64_R14: offset = G_STRUCT_OFFSET (MonoLMF, r14); break;
5921                         case AMD64_R15: offset = G_STRUCT_OFFSET (MonoLMF, r15); break;
5922 #ifdef HOST_WIN32
5923                         case AMD64_RDI: offset = G_STRUCT_OFFSET (MonoLMF, rdi); break;
5924                         case AMD64_RSI: offset = G_STRUCT_OFFSET (MonoLMF, rsi); break;
5925 #endif
5926                         default:
5927                                 offset = -1;
5928                                 break;
5929                         }
5930
5931                         if (offset != -1) {
5932                                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + offset, i, 8);
5933                                 if (cfg->arch.omit_fp || (i != AMD64_RBP))
5934                                         mono_emit_unwind_op_offset (cfg, code, i, - (cfa_offset - (lmf_offset + offset)));
5935                         }
5936                 }
5937         }
5938
5939         /* Save callee saved registers */
5940         if (cfg->arch.omit_fp && !method->save_lmf) {
5941                 gint32 save_area_offset = cfg->arch.reg_save_area_offset;
5942
5943                 /* Save caller saved registers after sp is adjusted */
5944                 /* The registers are saved at the bottom of the frame */
5945                 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
5946                 for (i = 0; i < AMD64_NREG; ++i)
5947                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
5948                                 amd64_mov_membase_reg (code, AMD64_RSP, save_area_offset, i, 8);
5949                                 mono_emit_unwind_op_offset (cfg, code, i, - (cfa_offset - save_area_offset));
5950                                 save_area_offset += 8;
5951                                 async_exc_point (code);
5952                         }
5953         }
5954
5955         /* store runtime generic context */
5956         if (cfg->rgctx_var) {
5957                 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
5958                                 (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP));
5959
5960                 amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 8);
5961         }
5962
5963         /* compute max_length in order to use short forward jumps */
5964         max_epilog_size = get_max_epilog_size (cfg);
5965         if (cfg->opt & MONO_OPT_BRANCH) {
5966                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
5967                         MonoInst *ins;
5968                         int max_length = 0;
5969
5970                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
5971                                 max_length += 6;
5972                         /* max alignment for loops */
5973                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
5974                                 max_length += LOOP_ALIGNMENT;
5975
5976                         MONO_BB_FOR_EACH_INS (bb, ins) {
5977                                 max_length += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
5978                         }
5979
5980                         /* Take prolog and epilog instrumentation into account */
5981                         if (bb == cfg->bb_entry || bb == cfg->bb_exit)
5982                                 max_length += max_epilog_size;
5983                         
5984                         bb->max_length = max_length;
5985                 }
5986         }
5987
5988         sig = mono_method_signature (method);
5989         pos = 0;
5990
5991         cinfo = cfg->arch.cinfo;
5992
5993         if (sig->ret->type != MONO_TYPE_VOID) {
5994                 /* Save volatile arguments to the stack */
5995                 if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
5996                         amd64_mov_membase_reg (code, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, cinfo->ret.reg, 8);
5997         }
5998
5999         /* Keep this in sync with emit_load_volatile_arguments */
6000         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
6001                 ArgInfo *ainfo = cinfo->args + i;
6002                 gint32 stack_offset;
6003                 MonoType *arg_type;
6004
6005                 ins = cfg->args [i];
6006
6007                 if ((ins->flags & MONO_INST_IS_DEAD) && !trace)
6008                         /* Unused arguments */
6009                         continue;
6010
6011                 if (sig->hasthis && (i == 0))
6012                         arg_type = &mono_defaults.object_class->byval_arg;
6013                 else
6014                         arg_type = sig->params [i - sig->hasthis];
6015
6016                 stack_offset = ainfo->offset + ARGS_OFFSET;
6017
6018                 if (cfg->globalra) {
6019                         /* All the other moves are done by the register allocator */
6020                         switch (ainfo->storage) {
6021                         case ArgInFloatSSEReg:
6022                                 amd64_sse_cvtss2sd_reg_reg (code, ainfo->reg, ainfo->reg);
6023                                 break;
6024                         case ArgValuetypeInReg:
6025                                 for (quad = 0; quad < 2; quad ++) {
6026                                         switch (ainfo->pair_storage [quad]) {
6027                                         case ArgInIReg:
6028                                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer));
6029                                                 break;
6030                                         case ArgInFloatSSEReg:
6031                                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
6032                                                 break;
6033                                         case ArgInDoubleSSEReg:
6034                                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
6035                                                 break;
6036                                         case ArgNone:
6037                                                 break;
6038                                         default:
6039                                                 g_assert_not_reached ();
6040                                         }
6041                                 }
6042                                 break;
6043                         default:
6044                                 break;
6045                         }
6046
6047                         continue;
6048                 }
6049
6050                 /* Save volatile arguments to the stack */
6051                 if (ins->opcode != OP_REGVAR) {
6052                         switch (ainfo->storage) {
6053                         case ArgInIReg: {
6054                                 guint32 size = 8;
6055
6056                                 /* FIXME: I1 etc */
6057                                 /*
6058                                 if (stack_offset & 0x1)
6059                                         size = 1;
6060                                 else if (stack_offset & 0x2)
6061                                         size = 2;
6062                                 else if (stack_offset & 0x4)
6063                                         size = 4;
6064                                 else
6065                                         size = 8;
6066                                 */
6067                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, size);
6068                                 break;
6069                         }
6070                         case ArgInFloatSSEReg:
6071                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
6072                                 break;
6073                         case ArgInDoubleSSEReg:
6074                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
6075                                 break;
6076                         case ArgValuetypeInReg:
6077                                 for (quad = 0; quad < 2; quad ++) {
6078                                         switch (ainfo->pair_storage [quad]) {
6079                                         case ArgInIReg:
6080                                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer));
6081                                                 break;
6082                                         case ArgInFloatSSEReg:
6083                                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
6084                                                 break;
6085                                         case ArgInDoubleSSEReg:
6086                                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
6087                                                 break;
6088                                         case ArgNone:
6089                                                 break;
6090                                         default:
6091                                                 g_assert_not_reached ();
6092                                         }
6093                                 }
6094                                 break;
6095                         case ArgValuetypeAddrInIReg:
6096                                 if (ainfo->pair_storage [0] == ArgInIReg)
6097                                         amd64_mov_membase_reg (code, ins->inst_left->inst_basereg, ins->inst_left->inst_offset, ainfo->pair_regs [0],  sizeof (gpointer));
6098                                 break;
6099                         default:
6100                                 break;
6101                         }
6102                 } else {
6103                         /* Argument allocated to (non-volatile) register */
6104                         switch (ainfo->storage) {
6105                         case ArgInIReg:
6106                                 amd64_mov_reg_reg (code, ins->dreg, ainfo->reg, 8);
6107                                 break;
6108                         case ArgOnStack:
6109                                 amd64_mov_reg_membase (code, ins->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
6110                                 break;
6111                         default:
6112                                 g_assert_not_reached ();
6113                         }
6114                 }
6115         }
6116
6117         /* Might need to attach the thread to the JIT  or change the domain for the callback */
6118         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
6119                 guint64 domain = (guint64)cfg->domain;
6120
6121                 args_clobbered = TRUE;
6122
6123                 /* 
6124                  * The call might clobber argument registers, but they are already
6125                  * saved to the stack/global regs.
6126                  */
6127                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
6128                         guint8 *buf, *no_domain_branch;
6129
6130                         code = mono_amd64_emit_tls_get (code, AMD64_RAX, appdomain_tls_offset);
6131                         if (cfg->compile_aot) {
6132                                 /* AOT code is only used in the root domain */
6133                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, 0);
6134                         } else {
6135                                 if ((domain >> 32) == 0)
6136                                         amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
6137                                 else
6138                                         amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
6139                         }
6140                         amd64_alu_reg_reg (code, X86_CMP, AMD64_RAX, AMD64_ARG_REG1);
6141                         no_domain_branch = code;
6142                         x86_branch8 (code, X86_CC_NE, 0, 0);
6143                         code = mono_amd64_emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset);
6144                         amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
6145                         buf = code;
6146                         x86_branch8 (code, X86_CC_NE, 0, 0);
6147                         amd64_patch (no_domain_branch, code);
6148                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
6149                                           (gpointer)"mono_jit_thread_attach", TRUE);
6150                         amd64_patch (buf, code);
6151 #ifdef HOST_WIN32
6152                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
6153                         /* FIXME: Add a separate key for LMF to avoid this */
6154                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
6155 #endif
6156                 } else {
6157                         g_assert (!cfg->compile_aot);
6158                         if (cfg->compile_aot) {
6159                                 /* AOT code is only used in the root domain */
6160                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, 0);
6161                         } else {
6162                                 if ((domain >> 32) == 0)
6163                                         amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
6164                                 else
6165                                         amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
6166                         }
6167                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
6168                                           (gpointer)"mono_jit_thread_attach", TRUE);
6169                 }
6170         }
6171
6172         if (method->save_lmf) {
6173                 if ((lmf_tls_offset != -1) && !optimize_for_xen) {
6174                         /*
6175                          * Optimized version which uses the mono_lmf TLS variable instead of 
6176                          * indirection through the mono_lmf_addr TLS variable.
6177                          */
6178                         /* %rax = previous_lmf */
6179                         x86_prefix (code, X86_FS_PREFIX);
6180                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
6181
6182                         /* Save previous_lmf */
6183                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_RAX, 8);
6184                         /* Set new lmf */
6185                         if (lmf_offset == 0) {
6186                                 x86_prefix (code, X86_FS_PREFIX);
6187                                 amd64_mov_mem_reg (code, lmf_tls_offset, cfg->frame_reg, 8);
6188                         } else {
6189                                 amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset);
6190                                 x86_prefix (code, X86_FS_PREFIX);
6191                                 amd64_mov_mem_reg (code, lmf_tls_offset, AMD64_R11, 8);
6192                         }
6193                 } else {
6194                         if (lmf_addr_tls_offset != -1) {
6195                                 /* Load lmf quicky using the FS register */
6196                                 code = mono_amd64_emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset);
6197 #ifdef HOST_WIN32
6198                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
6199                                 /* FIXME: Add a separate key for LMF to avoid this */
6200                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
6201 #endif
6202                         }
6203                         else {
6204                                 /* 
6205                                  * The call might clobber argument registers, but they are already
6206                                  * saved to the stack/global regs.
6207                                  */
6208                                 args_clobbered = TRUE;
6209                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
6210                                                                   (gpointer)"mono_get_lmf_addr", TRUE);         
6211                         }
6212
6213                         /* Save lmf_addr */
6214                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
6215                         /* Save previous_lmf */
6216                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
6217                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
6218                         /* Set new lmf */
6219                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset);
6220                         amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
6221                 }
6222         }
6223
6224         if (trace) {
6225                 args_clobbered = TRUE;
6226                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
6227         }
6228
6229         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
6230                 args_clobbered = TRUE;
6231
6232         /*
6233          * Optimize the common case of the first bblock making a call with the same
6234          * arguments as the method. This works because the arguments are still in their
6235          * original argument registers.
6236          * FIXME: Generalize this
6237          */
6238         if (!args_clobbered) {
6239                 MonoBasicBlock *first_bb = cfg->bb_entry;
6240                 MonoInst *next;
6241
6242                 next = mono_bb_first_ins (first_bb);
6243                 if (!next && first_bb->next_bb) {
6244                         first_bb = first_bb->next_bb;
6245                         next = mono_bb_first_ins (first_bb);
6246                 }
6247
6248                 if (first_bb->in_count > 1)
6249                         next = NULL;
6250
6251                 for (i = 0; next && i < sig->param_count + sig->hasthis; ++i) {
6252                         ArgInfo *ainfo = cinfo->args + i;
6253                         gboolean match = FALSE;
6254                         
6255                         ins = cfg->args [i];
6256                         if (ins->opcode != OP_REGVAR) {
6257                                 switch (ainfo->storage) {
6258                                 case ArgInIReg: {
6259                                         if (((next->opcode == OP_LOAD_MEMBASE) || (next->opcode == OP_LOADI4_MEMBASE)) && next->inst_basereg == ins->inst_basereg && next->inst_offset == ins->inst_offset) {
6260                                                 if (next->dreg == ainfo->reg) {
6261                                                         NULLIFY_INS (next);
6262                                                         match = TRUE;
6263                                                 } else {
6264                                                         next->opcode = OP_MOVE;
6265                                                         next->sreg1 = ainfo->reg;
6266                                                         /* Only continue if the instruction doesn't change argument regs */
6267                                                         if (next->dreg == ainfo->reg || next->dreg == AMD64_RAX)
6268                                                                 match = TRUE;
6269                                                 }
6270                                         }
6271                                         break;
6272                                 }
6273                                 default:
6274                                         break;
6275                                 }
6276                         } else {
6277                                 /* Argument allocated to (non-volatile) register */
6278                                 switch (ainfo->storage) {
6279                                 case ArgInIReg:
6280                                         if (next->opcode == OP_MOVE && next->sreg1 == ins->dreg && next->dreg == ainfo->reg) {
6281                                                 NULLIFY_INS (next);
6282                                                 match = TRUE;
6283                                         }
6284                                         break;
6285                                 default:
6286                                         break;
6287                                 }
6288                         }
6289
6290                         if (match) {
6291                                 next = next->next;
6292                                 //next = mono_inst_list_next (&next->node, &first_bb->ins_list);
6293                                 if (!next)
6294                                         break;
6295                         }
6296                 }
6297         }
6298
6299         /* Initialize ss_trigger_page_var */
6300         if (cfg->arch.ss_trigger_page_var) {
6301                 MonoInst *var = cfg->arch.ss_trigger_page_var;
6302
6303                 g_assert (!cfg->compile_aot);
6304                 g_assert (var->opcode == OP_REGOFFSET);
6305
6306                 amd64_mov_reg_imm (code, AMD64_R11, (guint64)ss_trigger_page);
6307                 amd64_mov_membase_reg (code, var->inst_basereg, var->inst_offset, AMD64_R11, 8);
6308         }
6309
6310         cfg->code_len = code - cfg->native_code;
6311
6312         g_assert (cfg->code_len < cfg->code_size);
6313
6314         return code;
6315 }
6316
6317 void
6318 mono_arch_emit_epilog (MonoCompile *cfg)
6319 {
6320         MonoMethod *method = cfg->method;
6321         int quad, pos, i;
6322         guint8 *code;
6323         int max_epilog_size;
6324         CallInfo *cinfo;
6325         gint32 lmf_offset = cfg->arch.lmf_offset;
6326         
6327         max_epilog_size = get_max_epilog_size (cfg);
6328
6329         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
6330                 cfg->code_size *= 2;
6331                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
6332                 mono_jit_stats.code_reallocs++;
6333         }
6334
6335         code = cfg->native_code + cfg->code_len;
6336
6337         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
6338                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
6339
6340         /* the code restoring the registers must be kept in sync with OP_JMP */
6341         pos = 0;
6342         
6343         if (method->save_lmf) {
6344                 /* check if we need to restore protection of the stack after a stack overflow */
6345                 if (mono_get_jit_tls_offset () != -1) {
6346                         guint8 *patch;
6347                         code = mono_amd64_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
6348                         /* we load the value in a separate instruction: this mechanism may be
6349                          * used later as a safer way to do thread interruption
6350                          */
6351                         amd64_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 8);
6352                         x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
6353                         patch = code;
6354                         x86_branch8 (code, X86_CC_Z, 0, FALSE);
6355                         /* note that the call trampoline will preserve eax/edx */
6356                         x86_call_reg (code, X86_ECX);
6357                         x86_patch (patch, code);
6358                 } else {
6359                         /* FIXME: maybe save the jit tls in the prolog */
6360                 }
6361                 if ((lmf_tls_offset != -1) && !optimize_for_xen) {
6362                         /*
6363                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
6364                          * through the mono_lmf_addr TLS variable.
6365                          */
6366                         /* reg = previous_lmf */
6367                         amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
6368                         x86_prefix (code, X86_FS_PREFIX);
6369                         amd64_mov_mem_reg (code, lmf_tls_offset, AMD64_R11, 8);
6370                 } else {
6371                         /* Restore previous lmf */
6372                         amd64_mov_reg_membase (code, AMD64_RCX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
6373                         amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
6374                         amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
6375                 }
6376
6377                 /* Restore caller saved regs */
6378                 if (cfg->used_int_regs & (1 << AMD64_RBP)) {
6379                         amd64_mov_reg_membase (code, AMD64_RBP, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), 8);
6380                 }
6381                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
6382                         amd64_mov_reg_membase (code, AMD64_RBX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
6383                 }
6384                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
6385                         amd64_mov_reg_membase (code, AMD64_R12, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
6386                 }
6387                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
6388                         amd64_mov_reg_membase (code, AMD64_R13, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
6389                 }
6390                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
6391                         amd64_mov_reg_membase (code, AMD64_R14, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
6392                 }
6393                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
6394                         amd64_mov_reg_membase (code, AMD64_R15, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
6395                 }
6396 #ifdef HOST_WIN32
6397                 if (cfg->used_int_regs & (1 << AMD64_RDI)) {
6398                         amd64_mov_reg_membase (code, AMD64_RDI, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rdi), 8);
6399                 }
6400                 if (cfg->used_int_regs & (1 << AMD64_RSI)) {
6401                         amd64_mov_reg_membase (code, AMD64_RSI, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsi), 8);
6402                 }
6403 #endif
6404         } else {
6405
6406                 if (cfg->arch.omit_fp) {
6407                         gint32 save_area_offset = cfg->arch.reg_save_area_offset;
6408
6409                         for (i = 0; i < AMD64_NREG; ++i)
6410                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
6411                                         amd64_mov_reg_membase (code, i, AMD64_RSP, save_area_offset, 8);
6412                                         save_area_offset += 8;
6413                                 }
6414                 }
6415                 else {
6416                         for (i = 0; i < AMD64_NREG; ++i)
6417                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
6418                                         pos -= sizeof (gpointer);
6419
6420                         if (pos) {
6421                                 if (pos == - sizeof (gpointer)) {
6422                                         /* Only one register, so avoid lea */
6423                                         for (i = AMD64_NREG - 1; i > 0; --i)
6424                                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
6425                                                         amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
6426                                                 }
6427                                 }
6428                                 else {
6429                                         amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
6430
6431                                         /* Pop registers in reverse order */
6432                                         for (i = AMD64_NREG - 1; i > 0; --i)
6433                                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
6434                                                         amd64_pop_reg (code, i);
6435                                                 }
6436                                 }
6437                         }
6438                 }
6439         }
6440
6441         /* Load returned vtypes into registers if needed */
6442         cinfo = cfg->arch.cinfo;
6443         if (cinfo->ret.storage == ArgValuetypeInReg) {
6444                 ArgInfo *ainfo = &cinfo->ret;
6445                 MonoInst *inst = cfg->ret;
6446
6447                 for (quad = 0; quad < 2; quad ++) {
6448                         switch (ainfo->pair_storage [quad]) {
6449                         case ArgInIReg:
6450                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
6451                                 break;
6452                         case ArgInFloatSSEReg:
6453                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
6454                                 break;
6455                         case ArgInDoubleSSEReg:
6456                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
6457                                 break;
6458                         case ArgNone:
6459                                 break;
6460                         default:
6461                                 g_assert_not_reached ();
6462                         }
6463                 }
6464         }
6465
6466         if (cfg->arch.omit_fp) {
6467                 if (cfg->arch.stack_alloc_size)
6468                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
6469         } else {
6470                 amd64_leave (code);
6471         }
6472         async_exc_point (code);
6473         amd64_ret (code);
6474
6475         cfg->code_len = code - cfg->native_code;
6476
6477         g_assert (cfg->code_len < cfg->code_size);
6478 }
6479
6480 void
6481 mono_arch_emit_exceptions (MonoCompile *cfg)
6482 {
6483         MonoJumpInfo *patch_info;
6484         int nthrows, i;
6485         guint8 *code;
6486         MonoClass *exc_classes [16];
6487         guint8 *exc_throw_start [16], *exc_throw_end [16];
6488         guint32 code_size = 0;
6489
6490         /* Compute needed space */
6491         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
6492                 if (patch_info->type == MONO_PATCH_INFO_EXC)
6493                         code_size += 40;
6494                 if (patch_info->type == MONO_PATCH_INFO_R8)
6495                         code_size += 8 + 15; /* sizeof (double) + alignment */
6496                 if (patch_info->type == MONO_PATCH_INFO_R4)
6497                         code_size += 4 + 15; /* sizeof (float) + alignment */
6498         }
6499
6500         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
6501                 cfg->code_size *= 2;
6502                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
6503                 mono_jit_stats.code_reallocs++;
6504         }
6505
6506         code = cfg->native_code + cfg->code_len;
6507
6508         /* add code to raise exceptions */
6509         nthrows = 0;
6510         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
6511                 switch (patch_info->type) {
6512                 case MONO_PATCH_INFO_EXC: {
6513                         MonoClass *exc_class;
6514                         guint8 *buf, *buf2;
6515                         guint32 throw_ip;
6516
6517                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
6518
6519                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
6520                         g_assert (exc_class);
6521                         throw_ip = patch_info->ip.i;
6522
6523                         //x86_breakpoint (code);
6524                         /* Find a throw sequence for the same exception class */
6525                         for (i = 0; i < nthrows; ++i)
6526                                 if (exc_classes [i] == exc_class)
6527                                         break;
6528                         if (i < nthrows) {
6529                                 amd64_mov_reg_imm (code, AMD64_ARG_REG2, (exc_throw_end [i] - cfg->native_code) - throw_ip);
6530                                 x86_jump_code (code, exc_throw_start [i]);
6531                                 patch_info->type = MONO_PATCH_INFO_NONE;
6532                         }
6533                         else {
6534                                 buf = code;
6535                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG2, 0xf0f0f0f0, 4);
6536                                 buf2 = code;
6537
6538                                 if (nthrows < 16) {
6539                                         exc_classes [nthrows] = exc_class;
6540                                         exc_throw_start [nthrows] = code;
6541                                 }
6542                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
6543
6544                                 patch_info->type = MONO_PATCH_INFO_NONE;
6545
6546                                 code = emit_call_body (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_arch_throw_corlib_exception");
6547
6548                                 amd64_mov_reg_imm (buf, AMD64_ARG_REG2, (code - cfg->native_code) - throw_ip);
6549                                 while (buf < buf2)
6550                                         x86_nop (buf);
6551
6552                                 if (nthrows < 16) {
6553                                         exc_throw_end [nthrows] = code;
6554                                         nthrows ++;
6555                                 }
6556                         }
6557                         break;
6558                 }
6559                 default:
6560                         /* do nothing */
6561                         break;
6562                 }
6563         }
6564
6565         /* Handle relocations with RIP relative addressing */
6566         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
6567                 gboolean remove = FALSE;
6568
6569                 switch (patch_info->type) {
6570                 case MONO_PATCH_INFO_R8:
6571                 case MONO_PATCH_INFO_R4: {
6572                         guint8 *pos;
6573
6574                         /* The SSE opcodes require a 16 byte alignment */
6575                         code = (guint8*)ALIGN_TO (code, 16);
6576
6577                         pos = cfg->native_code + patch_info->ip.i;
6578
6579                         if (IS_REX (pos [1]))
6580                                 *(guint32*)(pos + 5) = (guint8*)code - pos - 9;
6581                         else
6582                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
6583
6584                         if (patch_info->type == MONO_PATCH_INFO_R8) {
6585                                 *(double*)code = *(double*)patch_info->data.target;
6586                                 code += sizeof (double);
6587                         } else {
6588                                 *(float*)code = *(float*)patch_info->data.target;
6589                                 code += sizeof (float);
6590                         }
6591
6592                         remove = TRUE;
6593                         break;
6594                 }
6595                 default:
6596                         break;
6597                 }
6598
6599                 if (remove) {
6600                         if (patch_info == cfg->patch_info)
6601                                 cfg->patch_info = patch_info->next;
6602                         else {
6603                                 MonoJumpInfo *tmp;
6604
6605                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
6606                                         ;
6607                                 tmp->next = patch_info->next;
6608                         }
6609                 }
6610         }
6611
6612         cfg->code_len = code - cfg->native_code;
6613
6614         g_assert (cfg->code_len < cfg->code_size);
6615
6616 }
6617
6618 #endif /* DISABLE_JIT */
6619
6620 void*
6621 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
6622 {
6623         guchar *code = p;
6624         CallInfo *cinfo = NULL;
6625         MonoMethodSignature *sig;
6626         MonoInst *inst;
6627         int i, n, stack_area = 0;
6628
6629         /* Keep this in sync with mono_arch_get_argument_info */
6630
6631         if (enable_arguments) {
6632                 /* Allocate a new area on the stack and save arguments there */
6633                 sig = mono_method_signature (cfg->method);
6634
6635                 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
6636
6637                 n = sig->param_count + sig->hasthis;
6638
6639                 stack_area = ALIGN_TO (n * 8, 16);
6640
6641                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
6642
6643                 for (i = 0; i < n; ++i) {
6644                         inst = cfg->args [i];
6645
6646                         if (inst->opcode == OP_REGVAR)
6647                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
6648                         else {
6649                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
6650                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
6651                         }
6652                 }
6653         }
6654
6655         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
6656         amd64_set_reg_template (code, AMD64_ARG_REG1);
6657         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RSP, 8);
6658         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
6659
6660         if (enable_arguments)
6661                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
6662
6663         return code;
6664 }
6665
6666 enum {
6667         SAVE_NONE,
6668         SAVE_STRUCT,
6669         SAVE_EAX,
6670         SAVE_EAX_EDX,
6671         SAVE_XMM
6672 };
6673
6674 void*
6675 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
6676 {
6677         guchar *code = p;
6678         int save_mode = SAVE_NONE;
6679         MonoMethod *method = cfg->method;
6680         MonoType *ret_type = mini_type_get_underlying_type (NULL, mono_method_signature (method)->ret);
6681         
6682         switch (ret_type->type) {
6683         case MONO_TYPE_VOID:
6684                 /* special case string .ctor icall */
6685                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
6686                         save_mode = SAVE_EAX;
6687                 else
6688                         save_mode = SAVE_NONE;
6689                 break;
6690         case MONO_TYPE_I8:
6691         case MONO_TYPE_U8:
6692                 save_mode = SAVE_EAX;
6693                 break;
6694         case MONO_TYPE_R4:
6695         case MONO_TYPE_R8:
6696                 save_mode = SAVE_XMM;
6697                 break;
6698         case MONO_TYPE_GENERICINST:
6699                 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
6700                         save_mode = SAVE_EAX;
6701                         break;
6702                 }
6703                 /* Fall through */
6704         case MONO_TYPE_VALUETYPE:
6705                 save_mode = SAVE_STRUCT;
6706                 break;
6707         default:
6708                 save_mode = SAVE_EAX;
6709                 break;
6710         }
6711
6712         /* Save the result and copy it into the proper argument register */
6713         switch (save_mode) {
6714         case SAVE_EAX:
6715                 amd64_push_reg (code, AMD64_RAX);
6716                 /* Align stack */
6717                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
6718                 if (enable_arguments)
6719                         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RAX, 8);
6720                 break;
6721         case SAVE_STRUCT:
6722                 /* FIXME: */
6723                 if (enable_arguments)
6724                         amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0);
6725                 break;
6726         case SAVE_XMM:
6727                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
6728                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
6729                 /* Align stack */
6730                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
6731                 /* 
6732                  * The result is already in the proper argument register so no copying
6733                  * needed.
6734                  */
6735                 break;
6736         case SAVE_NONE:
6737                 break;
6738         default:
6739                 g_assert_not_reached ();
6740         }
6741
6742         /* Set %al since this is a varargs call */
6743         if (save_mode == SAVE_XMM)
6744                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
6745         else
6746                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
6747
6748         if (preserve_argument_registers) {
6749                 amd64_push_reg (code, MONO_AMD64_ARG_REG1);
6750                 amd64_push_reg (code, MONO_AMD64_ARG_REG2);
6751         }
6752
6753         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
6754         amd64_set_reg_template (code, AMD64_ARG_REG1);
6755         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
6756
6757         if (preserve_argument_registers) {
6758                 amd64_pop_reg (code, MONO_AMD64_ARG_REG2);
6759                 amd64_pop_reg (code, MONO_AMD64_ARG_REG1);
6760         }
6761
6762         /* Restore result */
6763         switch (save_mode) {
6764         case SAVE_EAX:
6765                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
6766                 amd64_pop_reg (code, AMD64_RAX);
6767                 break;
6768         case SAVE_STRUCT:
6769                 /* FIXME: */
6770                 break;
6771         case SAVE_XMM:
6772                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
6773                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
6774                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
6775                 break;
6776         case SAVE_NONE:
6777                 break;
6778         default:
6779                 g_assert_not_reached ();
6780         }
6781
6782         return code;
6783 }
6784
6785 void
6786 mono_arch_flush_icache (guint8 *code, gint size)
6787 {
6788         /* Not needed */
6789 }
6790
6791 void
6792 mono_arch_flush_register_windows (void)
6793 {
6794 }
6795
6796 gboolean 
6797 mono_arch_is_inst_imm (gint64 imm)
6798 {
6799         return amd64_is_imm32 (imm);
6800 }
6801
6802 /*
6803  * Determine whenever the trap whose info is in SIGINFO is caused by
6804  * integer overflow.
6805  */
6806 gboolean
6807 mono_arch_is_int_overflow (void *sigctx, void *info)
6808 {
6809         MonoContext ctx;
6810         guint8* rip;
6811         int reg;
6812         gint64 value;
6813
6814         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
6815
6816         rip = (guint8*)ctx.rip;
6817
6818         if (IS_REX (rip [0])) {
6819                 reg = amd64_rex_b (rip [0]);
6820                 rip ++;
6821         }
6822         else
6823                 reg = 0;
6824
6825         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
6826                 /* idiv REG */
6827                 reg += x86_modrm_rm (rip [1]);
6828
6829                 switch (reg) {
6830                 case AMD64_RAX:
6831                         value = ctx.rax;
6832                         break;
6833                 case AMD64_RBX:
6834                         value = ctx.rbx;
6835                         break;
6836                 case AMD64_RCX:
6837                         value = ctx.rcx;
6838                         break;
6839                 case AMD64_RDX:
6840                         value = ctx.rdx;
6841                         break;
6842                 case AMD64_RBP:
6843                         value = ctx.rbp;
6844                         break;
6845                 case AMD64_RSP:
6846                         value = ctx.rsp;
6847                         break;
6848                 case AMD64_RSI:
6849                         value = ctx.rsi;
6850                         break;
6851                 case AMD64_RDI:
6852                         value = ctx.rdi;
6853                         break;
6854                 case AMD64_R12:
6855                         value = ctx.r12;
6856                         break;
6857                 case AMD64_R13:
6858                         value = ctx.r13;
6859                         break;
6860                 case AMD64_R14:
6861                         value = ctx.r14;
6862                         break;
6863                 case AMD64_R15:
6864                         value = ctx.r15;
6865                         break;
6866                 default:
6867                         g_assert_not_reached ();
6868                         reg = -1;
6869                 }                       
6870
6871                 if (value == -1)
6872                         return TRUE;
6873         }
6874
6875         return FALSE;
6876 }
6877
6878 guint32
6879 mono_arch_get_patch_offset (guint8 *code)
6880 {
6881         return 3;
6882 }
6883
6884 /**
6885  * mono_breakpoint_clean_code:
6886  *
6887  * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
6888  * breakpoints in the original code, they are removed in the copy.
6889  *
6890  * Returns TRUE if no sw breakpoint was present.
6891  */
6892 gboolean
6893 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
6894 {
6895         int i;
6896         gboolean can_write = TRUE;
6897         /*
6898          * If method_start is non-NULL we need to perform bound checks, since we access memory
6899          * at code - offset we could go before the start of the method and end up in a different
6900          * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
6901          * instead.
6902          */
6903         if (!method_start || code - offset >= method_start) {
6904                 memcpy (buf, code - offset, size);
6905         } else {
6906                 int diff = code - method_start;
6907                 memset (buf, 0, size);
6908                 memcpy (buf + offset - diff, method_start, diff + size - offset);
6909         }
6910         code -= offset;
6911         for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
6912                 int idx = mono_breakpoint_info_index [i];
6913                 guint8 *ptr;
6914                 if (idx < 1)
6915                         continue;
6916                 ptr = mono_breakpoint_info [idx].address;
6917                 if (ptr >= code && ptr < code + size) {
6918                         guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
6919                         can_write = FALSE;
6920                         /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
6921                         buf [ptr - code] = saved_byte;
6922                 }
6923         }
6924         return can_write;
6925 }
6926
6927 gpointer
6928 mono_arch_get_vcall_slot (guint8 *code, mgreg_t *regs, int *displacement)
6929 {
6930         guint8 buf [10];
6931         guint32 reg;
6932         gint32 disp;
6933         guint8 rex = 0;
6934         MonoJitInfo *ji = NULL;
6935
6936 #ifdef ENABLE_LLVM
6937         /* code - 9 might be before the start of the method */
6938         /* FIXME: Avoid this expensive call somehow */
6939         ji = mono_jit_info_table_find (mono_domain_get (), (char*)code);
6940 #endif
6941
6942         mono_breakpoint_clean_code (ji ? ji->code_start : NULL, code, 9, buf, sizeof (buf));
6943         code = buf + 9;
6944
6945         *displacement = 0;
6946
6947         code -= 7;
6948
6949         /* 
6950          * A given byte sequence can match more than case here, so we have to be
6951          * really careful about the ordering of the cases. Longer sequences
6952          * come first.
6953          * There are two types of calls:
6954          * - direct calls: 0xff address_byte 8/32 bits displacement
6955          * - indirect calls: nop nop nop <call>
6956          * The nops make sure we don't confuse the instruction preceeding an indirect
6957          * call with a direct call.
6958          */
6959         if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
6960                 /* call OFFSET(%rip) */
6961                 disp = *(guint32*)(code + 3);
6962                 return (gpointer*)(code + disp + 7);
6963         } else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2) && (amd64_sib_index (code [2]) == 4) && (amd64_sib_scale (code [2]) == 0)) {
6964                 /* call *[reg+disp32] using indexed addressing */
6965                 /* The LLVM JIT emits this, and we emit it too for %r12 */
6966                 if (IS_REX (code [-1])) {
6967                         rex = code [-1];
6968                         g_assert (amd64_rex_x (rex) == 0);
6969                 }                       
6970                 reg = amd64_sib_base (code [2]);
6971                 disp = *(gint32*)(code + 3);
6972         } else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
6973                 /* call *[reg+disp32] */
6974                 if (IS_REX (code [0]))
6975                         rex = code [0];
6976                 reg = amd64_modrm_rm (code [2]);
6977                 disp = *(gint32*)(code + 3);
6978                 /* R10 is clobbered by the IMT thunk code */
6979                 g_assert (reg != AMD64_R10);
6980         } else if (code [2] == 0xe8) {
6981                 /* call <ADDR> */
6982                 return NULL;
6983         } else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1) && (amd64_sib_index (code [5]) == 4) && (amd64_sib_scale (code [5]) == 0)) {
6984                 /* call *[r12+disp8] using indexed addressing */
6985                 if (IS_REX (code [2]))
6986                         rex = code [2];
6987                 reg = amd64_sib_base (code [5]);
6988                 disp = *(gint8*)(code + 6);
6989         } else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
6990                 /* call *%reg */
6991                 return NULL;
6992         } else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) {
6993                 /* call *[reg+disp8] */
6994                 if (IS_REX (code [3]))
6995                         rex = code [3];
6996                 reg = amd64_modrm_rm (code [5]);
6997                 disp = *(gint8*)(code + 6);
6998                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
6999         }
7000         else if ((code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
7001                 /* call *%reg */
7002                 if (IS_REX (code [4]))
7003                         rex = code [4];
7004                 reg = amd64_modrm_rm (code [6]);
7005                 disp = 0;
7006         }
7007         else
7008                 g_assert_not_reached ();
7009
7010         reg += amd64_rex_b (rex);
7011
7012         /* R11 is clobbered by the trampoline code */
7013         g_assert (reg != AMD64_R11);
7014
7015         *displacement = disp;
7016         return (gpointer)regs [reg];
7017 }
7018
7019 int
7020 mono_arch_get_this_arg_reg (MonoMethodSignature *sig, MonoGenericSharingContext *gsctx, guint8 *code)
7021 {
7022         return AMD64_ARG_REG1;
7023 }
7024
7025 gpointer
7026 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, mgreg_t *regs, guint8 *code)
7027 {
7028         return (gpointer)regs [mono_arch_get_this_arg_reg (sig, gsctx, code)];
7029 }
7030
7031 #define MAX_ARCH_DELEGATE_PARAMS 10
7032
7033 static gpointer
7034 get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len)
7035 {
7036         guint8 *code, *start;
7037         int i;
7038
7039         if (has_target) {
7040                 start = code = mono_global_codeman_reserve (64);
7041
7042                 /* Replace the this argument with the target */
7043                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7044                 amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, target), 8);
7045                 amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
7046
7047                 g_assert ((code - start) < 64);
7048         } else {
7049                 start = code = mono_global_codeman_reserve (64);
7050
7051                 if (param_count == 0) {
7052                         amd64_jump_membase (code, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
7053                 } else {
7054                         /* We have to shift the arguments left */
7055                         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7056                         for (i = 0; i < param_count; ++i) {
7057 #ifdef HOST_WIN32
7058                                 if (i < 3)
7059                                         amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7060                                 else
7061                                         amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
7062 #else
7063                                 amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7064 #endif
7065                         }
7066
7067                         amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
7068                 }
7069                 g_assert ((code - start) < 64);
7070         }
7071
7072         mono_debug_add_delegate_trampoline (start, code - start);
7073
7074         if (code_len)
7075                 *code_len = code - start;
7076
7077         return start;
7078 }
7079
7080 /*
7081  * mono_arch_get_delegate_invoke_impls:
7082  *
7083  *   Return a list of MonoTrampInfo structures for the delegate invoke impl
7084  * trampolines.
7085  */
7086 GSList*
7087 mono_arch_get_delegate_invoke_impls (void)
7088 {
7089         GSList *res = NULL;
7090         guint8 *code;
7091         guint32 code_len;
7092         int i;
7093
7094         code = get_delegate_invoke_impl (TRUE, 0, &code_len);
7095         res = g_slist_prepend (res, mono_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len, NULL, NULL));
7096
7097         for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) {
7098                 code = get_delegate_invoke_impl (FALSE, i, &code_len);
7099                 res = g_slist_prepend (res, mono_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len, NULL, NULL));
7100         }
7101
7102         return res;
7103 }
7104
7105 gpointer
7106 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
7107 {
7108         guint8 *code, *start;
7109         int i;
7110
7111         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
7112                 return NULL;
7113
7114         /* FIXME: Support more cases */
7115         if (MONO_TYPE_ISSTRUCT (sig->ret))
7116                 return NULL;
7117
7118         if (has_target) {
7119                 static guint8* cached = NULL;
7120
7121                 if (cached)
7122                         return cached;
7123
7124                 if (mono_aot_only)
7125                         start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
7126                 else
7127                         start = get_delegate_invoke_impl (TRUE, 0, NULL);
7128
7129                 mono_memory_barrier ();
7130
7131                 cached = start;
7132         } else {
7133                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
7134                 for (i = 0; i < sig->param_count; ++i)
7135                         if (!mono_is_regsize_var (sig->params [i]))
7136                                 return NULL;
7137                 if (sig->param_count > 4)
7138                         return NULL;
7139
7140                 code = cache [sig->param_count];
7141                 if (code)
7142                         return code;
7143
7144                 if (mono_aot_only) {
7145                         char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
7146                         start = mono_aot_get_trampoline (name);
7147                         g_free (name);
7148                 } else {
7149                         start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL);
7150                 }
7151
7152                 mono_memory_barrier ();
7153
7154                 cache [sig->param_count] = start;
7155         }
7156
7157         return start;
7158 }
7159
7160 /*
7161  * Support for fast access to the thread-local lmf structure using the GS
7162  * segment register on NPTL + kernel 2.6.x.
7163  */
7164
7165 static gboolean tls_offset_inited = FALSE;
7166
7167 void
7168 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
7169 {
7170         if (!tls_offset_inited) {
7171 #ifdef HOST_WIN32
7172                 /* 
7173                  * We need to init this multiple times, since when we are first called, the key might not
7174                  * be initialized yet.
7175                  */
7176                 appdomain_tls_offset = mono_domain_get_tls_key ();
7177                 lmf_tls_offset = mono_get_jit_tls_key ();
7178                 lmf_addr_tls_offset = mono_get_jit_tls_key ();
7179
7180                 /* Only 64 tls entries can be accessed using inline code */
7181                 if (appdomain_tls_offset >= 64)
7182                         appdomain_tls_offset = -1;
7183                 if (lmf_tls_offset >= 64)
7184                         lmf_tls_offset = -1;
7185 #else
7186                 tls_offset_inited = TRUE;
7187 #ifdef MONO_XEN_OPT
7188                 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
7189 #endif
7190                 appdomain_tls_offset = mono_domain_get_tls_offset ();
7191                 lmf_tls_offset = mono_get_lmf_tls_offset ();
7192                 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
7193 #endif
7194         }               
7195 }
7196
7197 void
7198 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
7199 {
7200 }
7201
7202 #ifdef MONO_ARCH_HAVE_IMT
7203
7204 #define CMP_SIZE (6 + 1)
7205 #define CMP_REG_REG_SIZE (4 + 1)
7206 #define BR_SMALL_SIZE 2
7207 #define BR_LARGE_SIZE 6
7208 #define MOV_REG_IMM_SIZE 10
7209 #define MOV_REG_IMM_32BIT_SIZE 6
7210 #define JUMP_REG_SIZE (2 + 1)
7211
7212 static int
7213 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
7214 {
7215         int i, distance = 0;
7216         for (i = start; i < target; ++i)
7217                 distance += imt_entries [i]->chunk_size;
7218         return distance;
7219 }
7220
7221 /*
7222  * LOCKING: called with the domain lock held
7223  */
7224 gpointer
7225 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
7226         gpointer fail_tramp)
7227 {
7228         int i;
7229         int size = 0;
7230         guint8 *code, *start;
7231         gboolean vtable_is_32bit = ((gsize)(vtable) == (gsize)(int)(gsize)(vtable));
7232
7233         for (i = 0; i < count; ++i) {
7234                 MonoIMTCheckItem *item = imt_entries [i];
7235                 if (item->is_equals) {
7236                         if (item->check_target_idx) {
7237                                 if (!item->compare_done) {
7238                                         if (amd64_is_imm32 (item->key))
7239                                                 item->chunk_size += CMP_SIZE;
7240                                         else
7241                                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
7242                                 }
7243                                 if (item->has_target_code) {
7244                                         item->chunk_size += MOV_REG_IMM_SIZE;
7245                                 } else {
7246                                         if (vtable_is_32bit)
7247                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
7248                                         else
7249                                                 item->chunk_size += MOV_REG_IMM_SIZE;
7250                                 }
7251                                 item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
7252                         } else {
7253                                 if (fail_tramp) {
7254                                         item->chunk_size += MOV_REG_IMM_SIZE * 3 + CMP_REG_REG_SIZE +
7255                                                 BR_SMALL_SIZE + JUMP_REG_SIZE * 2;
7256                                 } else {
7257                                         if (vtable_is_32bit)
7258                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
7259                                         else
7260                                                 item->chunk_size += MOV_REG_IMM_SIZE;
7261                                         item->chunk_size += JUMP_REG_SIZE;
7262                                         /* with assert below:
7263                                          * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
7264                                          */
7265                                 }
7266                         }
7267                 } else {
7268                         if (amd64_is_imm32 (item->key))
7269                                 item->chunk_size += CMP_SIZE;
7270                         else
7271                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
7272                         item->chunk_size += BR_LARGE_SIZE;
7273                         imt_entries [item->check_target_idx]->compare_done = TRUE;
7274                 }
7275                 size += item->chunk_size;
7276         }
7277         if (fail_tramp)
7278                 code = mono_method_alloc_generic_virtual_thunk (domain, size);
7279         else
7280                 code = mono_domain_code_reserve (domain, size);
7281         start = code;
7282         for (i = 0; i < count; ++i) {
7283                 MonoIMTCheckItem *item = imt_entries [i];
7284                 item->code_target = code;
7285                 if (item->is_equals) {
7286                         gboolean fail_case = !item->check_target_idx && fail_tramp;
7287
7288                         if (item->check_target_idx || fail_case) {
7289                                 if (!item->compare_done || fail_case) {
7290                                         if (amd64_is_imm32 (item->key))
7291                                                 amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
7292                                         else {
7293                                                 amd64_mov_reg_imm (code, AMD64_R10, item->key);
7294                                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
7295                                         }
7296                                 }
7297                                 item->jmp_code = code;
7298                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
7299                                 /* See the comment below about R10 */
7300                                 if (item->has_target_code) {
7301                                         amd64_mov_reg_imm (code, AMD64_R10, item->value.target_code);
7302                                         amd64_jump_reg (code, AMD64_R10);
7303                                 } else {
7304                                         amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
7305                                         amd64_jump_membase (code, AMD64_R10, 0);
7306                                 }
7307
7308                                 if (fail_case) {
7309                                         amd64_patch (item->jmp_code, code);
7310                                         amd64_mov_reg_imm (code, AMD64_R10, fail_tramp);
7311                                         amd64_jump_reg (code, AMD64_R10);
7312                                         item->jmp_code = NULL;
7313                                 }
7314                         } else {
7315                                 /* enable the commented code to assert on wrong method */
7316 #if 0
7317                                 if (amd64_is_imm32 (item->key))
7318                                         amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
7319                                 else {
7320                                         amd64_mov_reg_imm (code, AMD64_R10, item->key);
7321                                         amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
7322                                 }
7323                                 item->jmp_code = code;
7324                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
7325                                 /* See the comment below about R10 */
7326                                 amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
7327                                 amd64_jump_membase (code, AMD64_R10, 0);
7328                                 amd64_patch (item->jmp_code, code);
7329                                 amd64_breakpoint (code);
7330                                 item->jmp_code = NULL;
7331 #else
7332                                 /* We're using R10 here because R11
7333                                    needs to be preserved.  R10 needs
7334                                    to be preserved for calls which
7335                                    require a runtime generic context,
7336                                    but interface calls don't. */
7337                                 amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->value.vtable_slot]));
7338                                 amd64_jump_membase (code, AMD64_R10, 0);
7339 #endif
7340                         }
7341                 } else {
7342                         if (amd64_is_imm32 (item->key))
7343                                 amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key);
7344                         else {
7345                                 amd64_mov_reg_imm (code, AMD64_R10, item->key);
7346                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
7347                         }
7348                         item->jmp_code = code;
7349                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
7350                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
7351                         else
7352                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
7353                 }
7354                 g_assert (code - item->code_target <= item->chunk_size);
7355         }
7356         /* patch the branches to get to the target items */
7357         for (i = 0; i < count; ++i) {
7358                 MonoIMTCheckItem *item = imt_entries [i];
7359                 if (item->jmp_code) {
7360                         if (item->check_target_idx) {
7361                                 amd64_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
7362                         }
7363                 }
7364         }
7365
7366         if (!fail_tramp)
7367                 mono_stats.imt_thunks_size += code - start;
7368         g_assert (code - start <= size);
7369
7370         return start;
7371 }
7372
7373 MonoMethod*
7374 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
7375 {
7376         return (MonoMethod*)regs [MONO_ARCH_IMT_REG];
7377 }
7378 #endif
7379
7380 MonoVTable*
7381 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
7382 {
7383         return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
7384 }
7385
7386 GSList*
7387 mono_arch_get_cie_program (void)
7388 {
7389         GSList *l = NULL;
7390
7391         mono_add_unwind_op_def_cfa (l, (guint8*)NULL, (guint8*)NULL, AMD64_RSP, 8);
7392         mono_add_unwind_op_offset (l, (guint8*)NULL, (guint8*)NULL, AMD64_RIP, -8);
7393
7394         return l;
7395 }
7396
7397 MonoInst*
7398 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
7399 {
7400         MonoInst *ins = NULL;
7401         int opcode = 0;
7402
7403         if (cmethod->klass == mono_defaults.math_class) {
7404                 if (strcmp (cmethod->name, "Sin") == 0) {
7405                         opcode = OP_SIN;
7406                 } else if (strcmp (cmethod->name, "Cos") == 0) {
7407                         opcode = OP_COS;
7408                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
7409                         opcode = OP_SQRT;
7410                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
7411                         opcode = OP_ABS;
7412                 }
7413                 
7414                 if (opcode) {
7415                         MONO_INST_NEW (cfg, ins, opcode);
7416                         ins->type = STACK_R8;
7417                         ins->dreg = mono_alloc_freg (cfg);
7418                         ins->sreg1 = args [0]->dreg;
7419                         MONO_ADD_INS (cfg->cbb, ins);
7420                 }
7421
7422                 opcode = 0;
7423                 if (cfg->opt & MONO_OPT_CMOV) {
7424                         if (strcmp (cmethod->name, "Min") == 0) {
7425                                 if (fsig->params [0]->type == MONO_TYPE_I4)
7426                                         opcode = OP_IMIN;
7427                                 if (fsig->params [0]->type == MONO_TYPE_U4)
7428                                         opcode = OP_IMIN_UN;
7429                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
7430                                         opcode = OP_LMIN;
7431                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
7432                                         opcode = OP_LMIN_UN;
7433                         } else if (strcmp (cmethod->name, "Max") == 0) {
7434                                 if (fsig->params [0]->type == MONO_TYPE_I4)
7435                                         opcode = OP_IMAX;
7436                                 if (fsig->params [0]->type == MONO_TYPE_U4)
7437                                         opcode = OP_IMAX_UN;
7438                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
7439                                         opcode = OP_LMAX;
7440                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
7441                                         opcode = OP_LMAX_UN;
7442                         }
7443                 }
7444                 
7445                 if (opcode) {
7446                         MONO_INST_NEW (cfg, ins, opcode);
7447                         ins->type = fsig->params [0]->type == MONO_TYPE_I4 ? STACK_I4 : STACK_I8;
7448                         ins->dreg = mono_alloc_ireg (cfg);
7449                         ins->sreg1 = args [0]->dreg;
7450                         ins->sreg2 = args [1]->dreg;
7451                         MONO_ADD_INS (cfg->cbb, ins);
7452                 }
7453
7454 #if 0
7455                 /* OP_FREM is not IEEE compatible */
7456                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
7457                         MONO_INST_NEW (cfg, ins, OP_FREM);
7458                         ins->inst_i0 = args [0];
7459                         ins->inst_i1 = args [1];
7460                 }
7461 #endif
7462         }
7463
7464         /* 
7465          * Can't implement CompareExchange methods this way since they have
7466          * three arguments.
7467          */
7468
7469         return ins;
7470 }
7471
7472 gboolean
7473 mono_arch_print_tree (MonoInst *tree, int arity)
7474 {
7475         return 0;
7476 }
7477
7478 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
7479 {
7480         MonoInst* ins;
7481         
7482         if (appdomain_tls_offset == -1)
7483                 return NULL;
7484         
7485         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
7486         ins->inst_offset = appdomain_tls_offset;
7487         return ins;
7488 }
7489
7490 #define _CTX_REG(ctx,fld,i) ((gpointer)((&ctx->fld)[i]))
7491
7492 gpointer
7493 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
7494 {
7495         switch (reg) {
7496         case AMD64_RCX: return (gpointer)ctx->rcx;
7497         case AMD64_RDX: return (gpointer)ctx->rdx;
7498         case AMD64_RBX: return (gpointer)ctx->rbx;
7499         case AMD64_RBP: return (gpointer)ctx->rbp;
7500         case AMD64_RSP: return (gpointer)ctx->rsp;
7501         default:
7502                 if (reg < 8)
7503                         return _CTX_REG (ctx, rax, reg);
7504                 else if (reg >= 12)
7505                         return _CTX_REG (ctx, r12, reg - 12);
7506                 else
7507                         g_assert_not_reached ();
7508         }
7509 }
7510
7511 /*
7512  * mono_arch_emit_load_aotconst:
7513  *
7514  *   Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
7515  * TARGET from the mscorlib GOT in full-aot code.
7516  * On AMD64, the result is placed into R11.
7517  */
7518 guint8*
7519 mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target)
7520 {
7521         *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
7522         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
7523
7524         return code;
7525 }
7526
7527 /*
7528  * mono_arch_get_trampolines:
7529  *
7530  *   Return a list of MonoTrampInfo structures describing arch specific trampolines
7531  * for AOT.
7532  */
7533 GSList *
7534 mono_arch_get_trampolines (gboolean aot)
7535 {
7536         MonoTrampInfo *info;
7537         GSList *tramps = NULL;
7538
7539         mono_arch_get_throw_pending_exception (&info, aot);
7540
7541         tramps = g_slist_append (tramps, info);
7542
7543         return tramps;
7544 }
7545
7546 /* Soft Debug support */
7547 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
7548
7549 /*
7550  * mono_arch_set_breakpoint:
7551  *
7552  *   Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
7553  * The location should contain code emitted by OP_SEQ_POINT.
7554  */
7555 void
7556 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
7557 {
7558         guint8 *code = ip;
7559         guint8 *orig_code = code;
7560
7561         /* 
7562          * In production, we will use int3 (has to fix the size in the md 
7563          * file). But that could confuse gdb, so during development, we emit a SIGSEGV
7564          * instead.
7565          */
7566         g_assert (code [0] == 0x90);
7567         if (breakpoint_size == 8) {
7568                 amd64_mov_reg_mem (code, AMD64_R11, (guint64)bp_trigger_page, 4);
7569         } else {
7570                 amd64_mov_reg_imm_size (code, AMD64_R11, (guint64)bp_trigger_page, 8);
7571                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 4);
7572         }
7573
7574         g_assert (code - orig_code == breakpoint_size);
7575 }
7576
7577 /*
7578  * mono_arch_clear_breakpoint:
7579  *
7580  *   Clear the breakpoint at IP.
7581  */
7582 void
7583 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
7584 {
7585         guint8 *code = ip;
7586         int i;
7587
7588         for (i = 0; i < breakpoint_size; ++i)
7589                 x86_nop (code);
7590 }
7591
7592 gboolean
7593 mono_arch_is_breakpoint_event (void *info, void *sigctx)
7594 {
7595 #ifdef HOST_WIN32
7596         EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info;
7597         return FALSE;
7598 #else
7599         siginfo_t* sinfo = (siginfo_t*) info;
7600         /* Sometimes the address is off by 4 */
7601         if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
7602                 return TRUE;
7603         else
7604                 return FALSE;
7605 #endif
7606 }
7607
7608 /*
7609  * mono_arch_get_ip_for_breakpoint:
7610  *
7611  *   Convert the ip in CTX to the address where a breakpoint was placed.
7612  */
7613 guint8*
7614 mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx)
7615 {
7616         guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
7617
7618         /* ip points to the instruction causing the fault */
7619         ip -= (breakpoint_size - breakpoint_fault_size);
7620
7621         return ip;
7622 }
7623
7624 /*
7625  * mono_arch_skip_breakpoint:
7626  *
7627  *   Modify CTX so the ip is placed after the breakpoint instruction, so when
7628  * we resume, the instruction is not executed again.
7629  */
7630 void
7631 mono_arch_skip_breakpoint (MonoContext *ctx)
7632 {
7633         MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + breakpoint_fault_size);
7634 }
7635         
7636 /*
7637  * mono_arch_start_single_stepping:
7638  *
7639  *   Start single stepping.
7640  */
7641 void
7642 mono_arch_start_single_stepping (void)
7643 {
7644         mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
7645 }
7646         
7647 /*
7648  * mono_arch_stop_single_stepping:
7649  *
7650  *   Stop single stepping.
7651  */
7652 void
7653 mono_arch_stop_single_stepping (void)
7654 {
7655         mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
7656 }
7657
7658 /*
7659  * mono_arch_is_single_step_event:
7660  *
7661  *   Return whenever the machine state in SIGCTX corresponds to a single
7662  * step event.
7663  */
7664 gboolean
7665 mono_arch_is_single_step_event (void *info, void *sigctx)
7666 {
7667 #ifdef HOST_WIN32
7668         EXCEPTION_RECORD* einfo = (EXCEPTION_RECORD*)info;
7669         return FALSE;
7670 #else
7671         siginfo_t* sinfo = (siginfo_t*) info;
7672         /* Sometimes the address is off by 4 */
7673         if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
7674                 return TRUE;
7675         else
7676                 return FALSE;
7677 #endif
7678 }
7679
7680 /*
7681  * mono_arch_get_ip_for_single_step:
7682  *
7683  *   Convert the ip in CTX to the address stored in seq_points.
7684  */
7685 guint8*
7686 mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx)
7687 {
7688         guint8 *ip = MONO_CONTEXT_GET_IP (ctx);
7689
7690         ip += single_step_fault_size;
7691
7692         return ip;
7693 }
7694
7695 /*
7696  * mono_arch_skip_single_step:
7697  *
7698  *   Modify CTX so the ip is placed after the single step trigger instruction,
7699  * we resume, the instruction is not executed again.
7700  */
7701 void
7702 mono_arch_skip_single_step (MonoContext *ctx)
7703 {
7704         MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + single_step_fault_size);
7705 }
7706
7707 /*
7708  * mono_arch_create_seq_point_info:
7709  *
7710  *   Return a pointer to a data structure which is used by the sequence
7711  * point implementation in AOTed code.
7712  */
7713 gpointer
7714 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
7715 {
7716         NOT_IMPLEMENTED;
7717         return NULL;
7718 }
7719
7720 #endif