2008-06-13 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *   Zoltan Varga (vargaz@gmail.com)
11  *
12  * (C) 2003 Ximian, Inc.
13  */
14 #include "mini.h"
15 #include <string.h>
16 #include <math.h>
17 #ifdef HAVE_UNISTD_H
18 #include <unistd.h>
19 #endif
20
21 #include <mono/metadata/appdomain.h>
22 #include <mono/metadata/debug-helpers.h>
23 #include <mono/metadata/threads.h>
24 #include <mono/metadata/profiler-private.h>
25 #include <mono/metadata/mono-debug.h>
26 #include <mono/utils/mono-math.h>
27
28 #include "trace.h"
29 #include "mini-amd64.h"
30 #include "inssel.h"
31 #include "cpu-amd64.h"
32
33 static gint lmf_tls_offset = -1;
34 static gint lmf_addr_tls_offset = -1;
35 static gint appdomain_tls_offset = -1;
36 static gint thread_tls_offset = -1;
37
38 #ifdef MONO_XEN_OPT
39 static gboolean optimize_for_xen = TRUE;
40 #else
41 #define optimize_for_xen 0
42 #endif
43
44 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
45
46 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
47
48 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
49
50 #ifdef PLATFORM_WIN32
51 /* Under windows, the default pinvoke calling convention is stdcall */
52 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
53 #else
54 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
55 #endif
56
57 /* This mutex protects architecture specific caches */
58 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
59 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
60 static CRITICAL_SECTION mini_arch_mutex;
61
62 MonoBreakpointInfo
63 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
64
65 #ifdef PLATFORM_WIN32
66 /* On Win64 always reserve first 32 bytes for first four arguments */
67 #define ARGS_OFFSET 48
68 #else
69 #define ARGS_OFFSET 16
70 #endif
71 #define GP_SCRATCH_REG AMD64_R11
72
73 /*
74  * AMD64 register usage:
75  * - callee saved registers are used for global register allocation
76  * - %r11 is used for materializing 64 bit constants in opcodes
77  * - the rest is used for local allocation
78  */
79
80 /*
81  * Floating point comparison results:
82  *                  ZF PF CF
83  * A > B            0  0  0
84  * A < B            0  0  1
85  * A = B            1  0  0
86  * A > B            0  0  0
87  * UNORDERED        1  1  1
88  */
89
90 const char*
91 mono_arch_regname (int reg)
92 {
93         switch (reg) {
94         case AMD64_RAX: return "%rax";
95         case AMD64_RBX: return "%rbx";
96         case AMD64_RCX: return "%rcx";
97         case AMD64_RDX: return "%rdx";
98         case AMD64_RSP: return "%rsp";  
99         case AMD64_RBP: return "%rbp";
100         case AMD64_RDI: return "%rdi";
101         case AMD64_RSI: return "%rsi";
102         case AMD64_R8: return "%r8";
103         case AMD64_R9: return "%r9";
104         case AMD64_R10: return "%r10";
105         case AMD64_R11: return "%r11";
106         case AMD64_R12: return "%r12";
107         case AMD64_R13: return "%r13";
108         case AMD64_R14: return "%r14";
109         case AMD64_R15: return "%r15";
110         }
111         return "unknown";
112 }
113
114 static const char * xmmregs [] = {
115         "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8",
116         "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
117 };
118
119 const char*
120 mono_arch_fregname (int reg)
121 {
122         if (reg < AMD64_XMM_NREG)
123                 return xmmregs [reg];
124         else
125                 return "unknown";
126 }
127
128 G_GNUC_UNUSED static void
129 break_count (void)
130 {
131 }
132
133 G_GNUC_UNUSED static gboolean
134 debug_count (void)
135 {
136         static int count = 0;
137         count ++;
138
139         if (!getenv ("COUNT"))
140                 return TRUE;
141
142         if (count == atoi (getenv ("COUNT"))) {
143                 break_count ();
144         }
145
146         if (count > atoi (getenv ("COUNT"))) {
147                 return FALSE;
148         }
149
150         return TRUE;
151 }
152
153 static gboolean
154 debug_omit_fp (void)
155 {
156 #if 0
157         return debug_count ();
158 #else
159         return TRUE;
160 #endif
161 }
162
163 static inline gboolean
164 amd64_is_near_call (guint8 *code)
165 {
166         /* Skip REX */
167         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
168                 code += 1;
169
170         return code [0] == 0xe8;
171 }
172
173 static inline void 
174 amd64_patch (unsigned char* code, gpointer target)
175 {
176         guint8 rex = 0;
177
178         /* Skip REX */
179         if ((code [0] >= 0x40) && (code [0] <= 0x4f)) {
180                 rex = code [0];
181                 code += 1;
182         }
183
184         if ((code [0] & 0xf8) == 0xb8) {
185                 /* amd64_set_reg_template */
186                 *(guint64*)(code + 1) = (guint64)target;
187         }
188         else if ((code [0] == 0x8b) && rex && x86_modrm_mod (code [1]) == 0 && x86_modrm_rm (code [1]) == 5) {
189                 /* mov 0(%rip), %dreg */
190                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
191         }
192         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
193                 /* call *<OFFSET>(%rip) */
194                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
195         }
196         else if ((code [0] == 0xe8)) {
197                 /* call <DISP> */
198                 gint64 disp = (guint8*)target - (guint8*)code;
199                 g_assert (amd64_is_imm32 (disp));
200                 x86_patch (code, (unsigned char*)target);
201         }
202         else
203                 x86_patch (code, (unsigned char*)target);
204 }
205
206 void 
207 mono_amd64_patch (unsigned char* code, gpointer target)
208 {
209         amd64_patch (code, target);
210 }
211
212 typedef enum {
213         ArgInIReg,
214         ArgInFloatSSEReg,
215         ArgInDoubleSSEReg,
216         ArgOnStack,
217         ArgValuetypeInReg,
218         ArgValuetypeAddrInIReg,
219         ArgNone /* only in pair_storage */
220 } ArgStorage;
221
222 typedef struct {
223         gint16 offset;
224         gint8  reg;
225         ArgStorage storage;
226
227         /* Only if storage == ArgValuetypeInReg */
228         ArgStorage pair_storage [2];
229         gint8 pair_regs [2];
230 } ArgInfo;
231
232 typedef struct {
233         int nargs;
234         guint32 stack_usage;
235         guint32 reg_usage;
236         guint32 freg_usage;
237         gboolean need_stack_align;
238         ArgInfo ret;
239         ArgInfo sig_cookie;
240         ArgInfo args [1];
241 } CallInfo;
242
243 #define DEBUG(a) if (cfg->verbose_level > 1) a
244
245 #define NEW_ICONST(cfg,dest,val) do {   \
246                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
247                 (dest)->opcode = OP_ICONST;     \
248                 (dest)->inst_c0 = (val);        \
249                 (dest)->type = STACK_I4;        \
250         } while (0)
251
252 #ifdef PLATFORM_WIN32
253 #define PARAM_REGS 4
254
255 static AMD64_Reg_No param_regs [] = { AMD64_RCX, AMD64_RDX, AMD64_R8, AMD64_R9 };
256
257 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
258 #else
259 #define PARAM_REGS 6
260  
261 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
262
263  static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
264 #endif
265
266 static void inline
267 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
268 {
269     ainfo->offset = *stack_size;
270
271     if (*gr >= PARAM_REGS) {
272                 ainfo->storage = ArgOnStack;
273                 (*stack_size) += sizeof (gpointer);
274     }
275     else {
276                 ainfo->storage = ArgInIReg;
277                 ainfo->reg = param_regs [*gr];
278                 (*gr) ++;
279     }
280 }
281
282 #ifdef PLATFORM_WIN32
283 #define FLOAT_PARAM_REGS 4
284 #else
285 #define FLOAT_PARAM_REGS 8
286 #endif
287
288 static void inline
289 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
290 {
291     ainfo->offset = *stack_size;
292
293     if (*gr >= FLOAT_PARAM_REGS) {
294                 ainfo->storage = ArgOnStack;
295                 (*stack_size) += sizeof (gpointer);
296     }
297     else {
298                 /* A double register */
299                 if (is_double)
300                         ainfo->storage = ArgInDoubleSSEReg;
301                 else
302                         ainfo->storage = ArgInFloatSSEReg;
303                 ainfo->reg = *gr;
304                 (*gr) += 1;
305     }
306 }
307
308 typedef enum ArgumentClass {
309         ARG_CLASS_NO_CLASS,
310         ARG_CLASS_MEMORY,
311         ARG_CLASS_INTEGER,
312         ARG_CLASS_SSE
313 } ArgumentClass;
314
315 static ArgumentClass
316 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
317 {
318         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
319         MonoType *ptype;
320
321         ptype = mono_type_get_underlying_type (type);
322         switch (ptype->type) {
323         case MONO_TYPE_BOOLEAN:
324         case MONO_TYPE_CHAR:
325         case MONO_TYPE_I1:
326         case MONO_TYPE_U1:
327         case MONO_TYPE_I2:
328         case MONO_TYPE_U2:
329         case MONO_TYPE_I4:
330         case MONO_TYPE_U4:
331         case MONO_TYPE_I:
332         case MONO_TYPE_U:
333         case MONO_TYPE_STRING:
334         case MONO_TYPE_OBJECT:
335         case MONO_TYPE_CLASS:
336         case MONO_TYPE_SZARRAY:
337         case MONO_TYPE_PTR:
338         case MONO_TYPE_FNPTR:
339         case MONO_TYPE_ARRAY:
340         case MONO_TYPE_I8:
341         case MONO_TYPE_U8:
342                 class2 = ARG_CLASS_INTEGER;
343                 break;
344         case MONO_TYPE_R4:
345         case MONO_TYPE_R8:
346 #ifdef PLATFORM_WIN32
347                 class2 = ARG_CLASS_INTEGER;
348 #else
349                 class2 = ARG_CLASS_SSE;
350 #endif
351                 break;
352
353         case MONO_TYPE_TYPEDBYREF:
354                 g_assert_not_reached ();
355
356         case MONO_TYPE_GENERICINST:
357                 if (!mono_type_generic_inst_is_valuetype (ptype)) {
358                         class2 = ARG_CLASS_INTEGER;
359                         break;
360                 }
361                 /* fall through */
362         case MONO_TYPE_VALUETYPE: {
363                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
364                 int i;
365
366                 for (i = 0; i < info->num_fields; ++i) {
367                         class2 = class1;
368                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
369                 }
370                 break;
371         }
372         default:
373                 g_assert_not_reached ();
374         }
375
376         /* Merge */
377         if (class1 == class2)
378                 ;
379         else if (class1 == ARG_CLASS_NO_CLASS)
380                 class1 = class2;
381         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
382                 class1 = ARG_CLASS_MEMORY;
383         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
384                 class1 = ARG_CLASS_INTEGER;
385         else
386                 class1 = ARG_CLASS_SSE;
387
388         return class1;
389 }
390
391 static void
392 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
393                gboolean is_return,
394                guint32 *gr, guint32 *fr, guint32 *stack_size)
395 {
396         guint32 size, quad, nquads, i;
397         ArgumentClass args [2];
398         MonoMarshalType *info;
399         MonoClass *klass;
400
401         klass = mono_class_from_mono_type (type);
402         if (sig->pinvoke) 
403                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
404         else 
405                 size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
406 #ifndef PLATFORM_WIN32
407         if (!sig->pinvoke || (size == 0) || (size > 16)) {
408 #else
409         if (!sig->pinvoke) {
410 #endif
411                 /* Allways pass in memory */
412                 ainfo->offset = *stack_size;
413                 *stack_size += ALIGN_TO (size, 8);
414                 ainfo->storage = ArgOnStack;
415
416                 return;
417         }
418
419         /* FIXME: Handle structs smaller than 8 bytes */
420         //if ((size % 8) != 0)
421         //      NOT_IMPLEMENTED;
422
423         if (size > 8)
424                 nquads = 2;
425         else
426                 nquads = 1;
427
428         /*
429          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
430          * The X87 and SSEUP stuff is left out since there are no such types in
431          * the CLR.
432          */
433         info = mono_marshal_load_type_info (klass);
434         g_assert (info);
435
436 #ifndef PLATFORM_WIN32
437         if (info->native_size > 16) {
438                 ainfo->offset = *stack_size;
439                 *stack_size += ALIGN_TO (info->native_size, 8);
440                 ainfo->storage = ArgOnStack;
441
442                 return;
443         }
444 #else
445         switch (info->native_size) {
446         case 1: case 2: case 4: case 8:
447                 break;
448         default:
449                 if (is_return) {
450                         ainfo->storage = ArgOnStack;
451                         ainfo->offset = *stack_size;
452                         *stack_size += ALIGN_TO (info->native_size, 8);
453                 }
454                 else {
455                         ainfo->storage = ArgValuetypeAddrInIReg;
456
457                         if (*gr < PARAM_REGS) {
458                                 ainfo->pair_storage [0] = ArgInIReg;
459                                 ainfo->pair_regs [0] = param_regs [*gr];
460                                 (*gr) ++;
461                         } 
462                         else {
463                                 ainfo->pair_storage [0] = ArgOnStack;
464                                 ainfo->offset = *stack_size;
465                                 *stack_size += 8;
466                         }
467                 }
468
469                 return;
470         }
471 #endif
472
473         args [0] = ARG_CLASS_NO_CLASS;
474         args [1] = ARG_CLASS_NO_CLASS;
475         for (quad = 0; quad < nquads; ++quad) {
476                 int size;
477                 guint32 align;
478                 ArgumentClass class1;
479                 
480                 class1 = ARG_CLASS_NO_CLASS;
481                 for (i = 0; i < info->num_fields; ++i) {
482                         size = mono_marshal_type_size (info->fields [i].field->type, 
483                                                                                    info->fields [i].mspec, 
484                                                                                    &align, TRUE, klass->unicode);
485                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
486                                 /* Unaligned field */
487                                 NOT_IMPLEMENTED;
488                         }
489
490                         /* Skip fields in other quad */
491                         if ((quad == 0) && (info->fields [i].offset >= 8))
492                                 continue;
493                         if ((quad == 1) && (info->fields [i].offset < 8))
494                                 continue;
495
496                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
497                 }
498                 g_assert (class1 != ARG_CLASS_NO_CLASS);
499                 args [quad] = class1;
500         }
501
502         /* Post merger cleanup */
503         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
504                 args [0] = args [1] = ARG_CLASS_MEMORY;
505
506         /* Allocate registers */
507         {
508                 int orig_gr = *gr;
509                 int orig_fr = *fr;
510
511                 ainfo->storage = ArgValuetypeInReg;
512                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
513                 for (quad = 0; quad < nquads; ++quad) {
514                         switch (args [quad]) {
515                         case ARG_CLASS_INTEGER:
516                                 if (*gr >= PARAM_REGS)
517                                         args [quad] = ARG_CLASS_MEMORY;
518                                 else {
519                                         ainfo->pair_storage [quad] = ArgInIReg;
520                                         if (is_return)
521                                                 ainfo->pair_regs [quad] = return_regs [*gr];
522                                         else
523                                                 ainfo->pair_regs [quad] = param_regs [*gr];
524                                         (*gr) ++;
525                                 }
526                                 break;
527                         case ARG_CLASS_SSE:
528                                 if (*fr >= FLOAT_PARAM_REGS)
529                                         args [quad] = ARG_CLASS_MEMORY;
530                                 else {
531                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
532                                         ainfo->pair_regs [quad] = *fr;
533                                         (*fr) ++;
534                                 }
535                                 break;
536                         case ARG_CLASS_MEMORY:
537                                 break;
538                         default:
539                                 g_assert_not_reached ();
540                         }
541                 }
542
543                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
544                         /* Revert possible register assignments */
545                         *gr = orig_gr;
546                         *fr = orig_fr;
547
548                         ainfo->offset = *stack_size;
549                         *stack_size += ALIGN_TO (info->native_size, 8);
550                         ainfo->storage = ArgOnStack;
551                 }
552         }
553 }
554
555 /*
556  * get_call_info:
557  *
558  *  Obtain information about a call according to the calling convention.
559  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
560  * Draft Version 0.23" document for more information.
561  */
562 static CallInfo*
563 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
564 {
565         guint32 i, gr, fr;
566         MonoType *ret_type;
567         int n = sig->hasthis + sig->param_count;
568         guint32 stack_size = 0;
569         CallInfo *cinfo;
570
571         if (mp)
572                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
573         else
574                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
575
576         gr = 0;
577         fr = 0;
578
579         /* return value */
580         {
581                 ret_type = mono_type_get_underlying_type (sig->ret);
582                 ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
583                 switch (ret_type->type) {
584                 case MONO_TYPE_BOOLEAN:
585                 case MONO_TYPE_I1:
586                 case MONO_TYPE_U1:
587                 case MONO_TYPE_I2:
588                 case MONO_TYPE_U2:
589                 case MONO_TYPE_CHAR:
590                 case MONO_TYPE_I4:
591                 case MONO_TYPE_U4:
592                 case MONO_TYPE_I:
593                 case MONO_TYPE_U:
594                 case MONO_TYPE_PTR:
595                 case MONO_TYPE_FNPTR:
596                 case MONO_TYPE_CLASS:
597                 case MONO_TYPE_OBJECT:
598                 case MONO_TYPE_SZARRAY:
599                 case MONO_TYPE_ARRAY:
600                 case MONO_TYPE_STRING:
601                         cinfo->ret.storage = ArgInIReg;
602                         cinfo->ret.reg = AMD64_RAX;
603                         break;
604                 case MONO_TYPE_U8:
605                 case MONO_TYPE_I8:
606                         cinfo->ret.storage = ArgInIReg;
607                         cinfo->ret.reg = AMD64_RAX;
608                         break;
609                 case MONO_TYPE_R4:
610                         cinfo->ret.storage = ArgInFloatSSEReg;
611                         cinfo->ret.reg = AMD64_XMM0;
612                         break;
613                 case MONO_TYPE_R8:
614                         cinfo->ret.storage = ArgInDoubleSSEReg;
615                         cinfo->ret.reg = AMD64_XMM0;
616                         break;
617                 case MONO_TYPE_GENERICINST:
618                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
619                                 cinfo->ret.storage = ArgInIReg;
620                                 cinfo->ret.reg = AMD64_RAX;
621                                 break;
622                         }
623                         /* fall through */
624                 case MONO_TYPE_VALUETYPE: {
625                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
626
627                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
628                         if (cinfo->ret.storage == ArgOnStack)
629                                 /* The caller passes the address where the value is stored */
630                                 add_general (&gr, &stack_size, &cinfo->ret);
631                         break;
632                 }
633                 case MONO_TYPE_TYPEDBYREF:
634                         /* Same as a valuetype with size 24 */
635                         add_general (&gr, &stack_size, &cinfo->ret);
636                         ;
637                         break;
638                 case MONO_TYPE_VOID:
639                         break;
640                 default:
641                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
642                 }
643         }
644
645         /* this */
646         if (sig->hasthis)
647                 add_general (&gr, &stack_size, cinfo->args + 0);
648
649         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
650                 gr = PARAM_REGS;
651                 fr = FLOAT_PARAM_REGS;
652                 
653                 /* Emit the signature cookie just before the implicit arguments */
654                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
655         }
656
657         for (i = 0; i < sig->param_count; ++i) {
658                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
659                 MonoType *ptype;
660
661                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
662                         /* We allways pass the sig cookie on the stack for simplicity */
663                         /* 
664                          * Prevent implicit arguments + the sig cookie from being passed 
665                          * in registers.
666                          */
667                         gr = PARAM_REGS;
668                         fr = FLOAT_PARAM_REGS;
669
670                         /* Emit the signature cookie just before the implicit arguments */
671                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
672                 }
673
674                 if (sig->params [i]->byref) {
675                         add_general (&gr, &stack_size, ainfo);
676                         continue;
677                 }
678                 ptype = mono_type_get_underlying_type (sig->params [i]);
679                 ptype = mini_get_basic_type_from_generic (gsctx, ptype);
680                 switch (ptype->type) {
681                 case MONO_TYPE_BOOLEAN:
682                 case MONO_TYPE_I1:
683                 case MONO_TYPE_U1:
684                         add_general (&gr, &stack_size, ainfo);
685                         break;
686                 case MONO_TYPE_I2:
687                 case MONO_TYPE_U2:
688                 case MONO_TYPE_CHAR:
689                         add_general (&gr, &stack_size, ainfo);
690                         break;
691                 case MONO_TYPE_I4:
692                 case MONO_TYPE_U4:
693                         add_general (&gr, &stack_size, ainfo);
694                         break;
695                 case MONO_TYPE_I:
696                 case MONO_TYPE_U:
697                 case MONO_TYPE_PTR:
698                 case MONO_TYPE_FNPTR:
699                 case MONO_TYPE_CLASS:
700                 case MONO_TYPE_OBJECT:
701                 case MONO_TYPE_STRING:
702                 case MONO_TYPE_SZARRAY:
703                 case MONO_TYPE_ARRAY:
704                         add_general (&gr, &stack_size, ainfo);
705                         break;
706                 case MONO_TYPE_GENERICINST:
707                         if (!mono_type_generic_inst_is_valuetype (ptype)) {
708                                 add_general (&gr, &stack_size, ainfo);
709                                 break;
710                         }
711                         /* fall through */
712                 case MONO_TYPE_VALUETYPE:
713                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
714                         break;
715                 case MONO_TYPE_TYPEDBYREF:
716                         stack_size += sizeof (MonoTypedRef);
717                         ainfo->storage = ArgOnStack;
718                         break;
719                 case MONO_TYPE_U8:
720                 case MONO_TYPE_I8:
721                         add_general (&gr, &stack_size, ainfo);
722                         break;
723                 case MONO_TYPE_R4:
724                         add_float (&fr, &stack_size, ainfo, FALSE);
725                         break;
726                 case MONO_TYPE_R8:
727                         add_float (&fr, &stack_size, ainfo, TRUE);
728                         break;
729                 default:
730                         g_assert_not_reached ();
731                 }
732         }
733
734         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
735                 gr = PARAM_REGS;
736                 fr = FLOAT_PARAM_REGS;
737                 
738                 /* Emit the signature cookie just before the implicit arguments */
739                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
740         }
741
742 #ifdef PLATFORM_WIN32
743         // There always is 32 bytes reserved on the stack when calling on Winx64
744         stack_size += 0x20;
745 #endif
746
747         if (stack_size & 0x8) {
748                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
749                 cinfo->need_stack_align = TRUE;
750                 stack_size += 8;
751         }
752
753         cinfo->stack_usage = stack_size;
754         cinfo->reg_usage = gr;
755         cinfo->freg_usage = fr;
756         return cinfo;
757 }
758
759 /*
760  * mono_arch_get_argument_info:
761  * @csig:  a method signature
762  * @param_count: the number of parameters to consider
763  * @arg_info: an array to store the result infos
764  *
765  * Gathers information on parameters such as size, alignment and
766  * padding. arg_info should be large enought to hold param_count + 1 entries. 
767  *
768  * Returns the size of the argument area on the stack.
769  */
770 int
771 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
772 {
773         int k;
774         CallInfo *cinfo = get_call_info (NULL, NULL, csig, FALSE);
775         guint32 args_size = cinfo->stack_usage;
776
777         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
778         if (csig->hasthis) {
779                 arg_info [0].offset = 0;
780         }
781
782         for (k = 0; k < param_count; k++) {
783                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
784                 /* FIXME: */
785                 arg_info [k + 1].size = 0;
786         }
787
788         g_free (cinfo);
789
790         return args_size;
791 }
792
793 static int 
794 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
795 {
796 #ifndef _MSC_VER
797         __asm__ __volatile__ ("cpuid"
798                 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
799                 : "a" (id));
800 #else
801         int info[4];
802         __cpuid(info, id);
803         *p_eax = info[0];
804         *p_ebx = info[1];
805         *p_ecx = info[2];
806         *p_edx = info[3];
807 #endif
808         return 1;
809 }
810
811 /*
812  * Initialize the cpu to execute managed code.
813  */
814 void
815 mono_arch_cpu_init (void)
816 {
817 #ifndef _MSC_VER
818         guint16 fpcw;
819
820         /* spec compliance requires running with double precision */
821         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
822         fpcw &= ~X86_FPCW_PRECC_MASK;
823         fpcw |= X86_FPCW_PREC_DOUBLE;
824         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
825         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
826 #else
827         /* TODO: This is crashing on Win64 right now.
828         * _control87 (_PC_53, MCW_PC);
829         */
830 #endif
831 }
832
833 /*
834  * Initialize architecture specific code.
835  */
836 void
837 mono_arch_init (void)
838 {
839         InitializeCriticalSection (&mini_arch_mutex);
840 }
841
842 /*
843  * Cleanup architecture specific code.
844  */
845 void
846 mono_arch_cleanup (void)
847 {
848         DeleteCriticalSection (&mini_arch_mutex);
849 }
850
851 /*
852  * This function returns the optimizations supported on this cpu.
853  */
854 guint32
855 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
856 {
857         int eax, ebx, ecx, edx;
858         guint32 opts = 0;
859
860         /* FIXME: AMD64 */
861
862         *exclude_mask = 0;
863         /* Feature Flags function, flags returned in EDX. */
864         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
865                 if (edx & (1 << 15)) {
866                         opts |= MONO_OPT_CMOV;
867                         if (edx & 1)
868                                 opts |= MONO_OPT_FCMOV;
869                         else
870                                 *exclude_mask |= MONO_OPT_FCMOV;
871                 } else
872                         *exclude_mask |= MONO_OPT_CMOV;
873         }
874         return opts;
875 }
876
877 GList *
878 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
879 {
880         GList *vars = NULL;
881         int i;
882
883         for (i = 0; i < cfg->num_varinfo; i++) {
884                 MonoInst *ins = cfg->varinfo [i];
885                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
886
887                 /* unused vars */
888                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
889                         continue;
890
891                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
892                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
893                         continue;
894
895                 if (mono_is_regsize_var (ins->inst_vtype)) {
896                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
897                         g_assert (i == vmv->idx);
898                         vars = g_list_prepend (vars, vmv);
899                 }
900         }
901
902         vars = mono_varlist_sort (cfg, vars, 0);
903
904         return vars;
905 }
906
907 /**
908  * mono_arch_compute_omit_fp:
909  *
910  *   Determine whenever the frame pointer can be eliminated.
911  */
912 static void
913 mono_arch_compute_omit_fp (MonoCompile *cfg)
914 {
915         MonoMethodSignature *sig;
916         MonoMethodHeader *header;
917         int i, locals_size;
918         CallInfo *cinfo;
919
920         if (cfg->arch.omit_fp_computed)
921                 return;
922
923         header = mono_method_get_header (cfg->method);
924
925         sig = mono_method_signature (cfg->method);
926
927         if (!cfg->arch.cinfo)
928                 cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
929         cinfo = cfg->arch.cinfo;
930
931         /*
932          * FIXME: Remove some of the restrictions.
933          */
934         cfg->arch.omit_fp = TRUE;
935         cfg->arch.omit_fp_computed = TRUE;
936
937         if (cfg->disable_omit_fp)
938                 cfg->arch.omit_fp = FALSE;
939
940         if (!debug_omit_fp ())
941                 cfg->arch.omit_fp = FALSE;
942         /*
943         if (cfg->method->save_lmf)
944                 cfg->arch.omit_fp = FALSE;
945         */
946         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
947                 cfg->arch.omit_fp = FALSE;
948         if (header->num_clauses)
949                 cfg->arch.omit_fp = FALSE;
950         if (cfg->param_area)
951                 cfg->arch.omit_fp = FALSE;
952         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
953                 cfg->arch.omit_fp = FALSE;
954         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
955                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
956                 cfg->arch.omit_fp = FALSE;
957         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
958                 ArgInfo *ainfo = &cinfo->args [i];
959
960                 if (ainfo->storage == ArgOnStack) {
961                         /* 
962                          * The stack offset can only be determined when the frame
963                          * size is known.
964                          */
965                         cfg->arch.omit_fp = FALSE;
966                 }
967         }
968
969         locals_size = 0;
970         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
971                 MonoInst *ins = cfg->varinfo [i];
972                 int ialign;
973
974                 locals_size += mono_type_size (ins->inst_vtype, &ialign);
975         }
976
977         if ((cfg->num_varinfo > 10000) || (locals_size >= (1 << 15))) {
978                 /* Avoid hitting the stack_alloc_size < (1 << 16) assertion in emit_epilog () */
979                 cfg->arch.omit_fp = FALSE;
980         }
981 }
982
983 GList *
984 mono_arch_get_global_int_regs (MonoCompile *cfg)
985 {
986         GList *regs = NULL;
987
988         mono_arch_compute_omit_fp (cfg);
989
990         if (cfg->arch.omit_fp)
991                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
992
993         /* We use the callee saved registers for global allocation */
994         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
995         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
996         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
997         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
998         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
999
1000         return regs;
1001 }
1002
1003 /*
1004  * mono_arch_regalloc_cost:
1005  *
1006  *  Return the cost, in number of memory references, of the action of 
1007  * allocating the variable VMV into a register during global register
1008  * allocation.
1009  */
1010 guint32
1011 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
1012 {
1013         MonoInst *ins = cfg->varinfo [vmv->idx];
1014
1015         if (cfg->method->save_lmf)
1016                 /* The register is already saved */
1017                 /* substract 1 for the invisible store in the prolog */
1018                 return (ins->opcode == OP_ARG) ? 0 : 1;
1019         else
1020                 /* push+pop */
1021                 return (ins->opcode == OP_ARG) ? 1 : 2;
1022 }
1023  
1024 void
1025 mono_arch_allocate_vars (MonoCompile *cfg)
1026 {
1027         MonoMethodSignature *sig;
1028         MonoMethodHeader *header;
1029         MonoInst *inst;
1030         int i, offset;
1031         guint32 locals_stack_size, locals_stack_align;
1032         gint32 *offsets;
1033         CallInfo *cinfo;
1034
1035         header = mono_method_get_header (cfg->method);
1036
1037         sig = mono_method_signature (cfg->method);
1038
1039         cinfo = cfg->arch.cinfo;
1040
1041         mono_arch_compute_omit_fp (cfg);
1042
1043         /*
1044          * We use the ABI calling conventions for managed code as well.
1045          * Exception: valuetypes are never passed or returned in registers.
1046          */
1047
1048         if (cfg->arch.omit_fp) {
1049                 cfg->flags |= MONO_CFG_HAS_SPILLUP;
1050                 cfg->frame_reg = AMD64_RSP;
1051                 offset = 0;
1052         } else {
1053                 /* Locals are allocated backwards from %fp */
1054                 cfg->frame_reg = AMD64_RBP;
1055                 offset = 0;
1056         }
1057
1058         if (cfg->method->save_lmf) {
1059                 /* Reserve stack space for saving LMF */
1060                 /* mono_arch_find_jit_info () expects to find the LMF at a fixed offset */
1061                 g_assert (offset == 0);
1062                 if (cfg->arch.omit_fp) {
1063                         cfg->arch.lmf_offset = offset;
1064                         offset += sizeof (MonoLMF);
1065                 }
1066                 else {
1067                         offset += sizeof (MonoLMF);
1068                         cfg->arch.lmf_offset = -offset;
1069                 }
1070         } else {
1071                 if (cfg->arch.omit_fp)
1072                         cfg->arch.reg_save_area_offset = offset;
1073                 /* Reserve space for caller saved registers */
1074                 for (i = 0; i < AMD64_NREG; ++i)
1075                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
1076                                 offset += sizeof (gpointer);
1077                         }
1078         }
1079
1080         if (sig->ret->type != MONO_TYPE_VOID) {
1081                 switch (cinfo->ret.storage) {
1082                 case ArgInIReg:
1083                 case ArgInFloatSSEReg:
1084                 case ArgInDoubleSSEReg:
1085                         if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
1086                                 /* The register is volatile */
1087                                 cfg->vret_addr->opcode = OP_REGOFFSET;
1088                                 cfg->vret_addr->inst_basereg = cfg->frame_reg;
1089                                 if (cfg->arch.omit_fp) {
1090                                         cfg->vret_addr->inst_offset = offset;
1091                                         offset += 8;
1092                                 } else {
1093                                         offset += 8;
1094                                         cfg->vret_addr->inst_offset = -offset;
1095                                 }
1096                                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1097                                         printf ("vret_addr =");
1098                                         mono_print_ins (cfg->vret_addr);
1099                                 }
1100                         }
1101                         else {
1102                                 cfg->ret->opcode = OP_REGVAR;
1103                                 cfg->ret->inst_c0 = cinfo->ret.reg;
1104                         }
1105                         break;
1106                 case ArgValuetypeInReg:
1107                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1108                         cfg->ret->opcode = OP_REGOFFSET;
1109                         cfg->ret->inst_basereg = cfg->frame_reg;
1110                         if (cfg->arch.omit_fp) {
1111                                 cfg->ret->inst_offset = offset;
1112                                 offset += 16;
1113                         } else {
1114                                 offset += 16;
1115                                 cfg->ret->inst_offset = - offset;
1116                         }
1117                         break;
1118                 default:
1119                         g_assert_not_reached ();
1120                 }
1121                 cfg->ret->dreg = cfg->ret->inst_c0;
1122         }
1123
1124         /* Allocate locals */
1125         offsets = mono_allocate_stack_slots_full (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align);
1126         if (locals_stack_align) {
1127                 offset += (locals_stack_align - 1);
1128                 offset &= ~(locals_stack_align - 1);
1129         }
1130         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1131                 if (offsets [i] != -1) {
1132                         MonoInst *inst = cfg->varinfo [i];
1133                         inst->opcode = OP_REGOFFSET;
1134                         inst->inst_basereg = cfg->frame_reg;
1135                         if (cfg->arch.omit_fp)
1136                                 inst->inst_offset = (offset + offsets [i]);
1137                         else
1138                                 inst->inst_offset = - (offset + offsets [i]);
1139                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
1140                 }
1141         }
1142         offset += locals_stack_size;
1143
1144         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
1145                 g_assert (!cfg->arch.omit_fp);
1146                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1147                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1148         }
1149
1150         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1151                 inst = cfg->args [i];
1152                 if (inst->opcode != OP_REGVAR) {
1153                         ArgInfo *ainfo = &cinfo->args [i];
1154                         gboolean inreg = TRUE;
1155                         MonoType *arg_type;
1156
1157                         if (sig->hasthis && (i == 0))
1158                                 arg_type = &mono_defaults.object_class->byval_arg;
1159                         else
1160                                 arg_type = sig->params [i - sig->hasthis];
1161
1162                         /* FIXME: Allocate volatile arguments to registers */
1163                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
1164                                 inreg = FALSE;
1165
1166                         /* 
1167                          * Under AMD64, all registers used to pass arguments to functions
1168                          * are volatile across calls.
1169                          * FIXME: Optimize this.
1170                          */
1171                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg))
1172                                 inreg = FALSE;
1173
1174                         inst->opcode = OP_REGOFFSET;
1175
1176                         switch (ainfo->storage) {
1177                         case ArgInIReg:
1178                         case ArgInFloatSSEReg:
1179                         case ArgInDoubleSSEReg:
1180                                 inst->opcode = OP_REGVAR;
1181                                 inst->dreg = ainfo->reg;
1182                                 break;
1183                         case ArgOnStack:
1184                                 g_assert (!cfg->arch.omit_fp);
1185                                 inst->opcode = OP_REGOFFSET;
1186                                 inst->inst_basereg = cfg->frame_reg;
1187                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1188                                 break;
1189                         case ArgValuetypeInReg:
1190                                 break;
1191                         case ArgValuetypeAddrInIReg:
1192                                 break;  /*FIXME: Not sure what to do for this case yet on Winx64*/
1193                         default:
1194                                 NOT_IMPLEMENTED;
1195                         }
1196
1197                         if (!inreg && (ainfo->storage != ArgOnStack)) {
1198                                 inst->opcode = OP_REGOFFSET;
1199                                 inst->inst_basereg = cfg->frame_reg;
1200                                 /* These arguments are saved to the stack in the prolog */
1201                                 offset = ALIGN_TO (offset, sizeof (gpointer));
1202                                 if (cfg->arch.omit_fp) {
1203                                         inst->inst_offset = offset;
1204                                         offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1205                                 } else {
1206                                         offset += (ainfo->storage == ArgValuetypeInReg) ? 2 * sizeof (gpointer) : sizeof (gpointer);
1207                                         inst->inst_offset = - offset;
1208                                 }
1209                         }
1210                 }
1211         }
1212
1213         cfg->stack_offset = offset;
1214 }
1215
1216 void
1217 mono_arch_create_vars (MonoCompile *cfg)
1218 {
1219         MonoMethodSignature *sig;
1220         CallInfo *cinfo;
1221
1222         sig = mono_method_signature (cfg->method);
1223
1224         if (!cfg->arch.cinfo)
1225                 cfg->arch.cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1226         cinfo = cfg->arch.cinfo;
1227
1228         if (cinfo->ret.storage == ArgValuetypeInReg)
1229                 cfg->ret_var_is_local = TRUE;
1230
1231         if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1232                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1233                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1234                         printf ("vret_addr = ");
1235                         mono_print_ins (cfg->vret_addr);
1236                 }
1237         }
1238 }
1239
1240 static void
1241 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg, MonoInst *tree)
1242 {
1243         switch (storage) {
1244         case ArgInIReg:
1245                 arg->opcode = OP_OUTARG_REG;
1246                 arg->inst_left = tree;
1247                 arg->inst_call = call;
1248                 arg->backend.reg3 = reg;
1249                 break;
1250         case ArgInFloatSSEReg:
1251                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
1252                 arg->inst_left = tree;
1253                 arg->inst_call = call;
1254                 arg->backend.reg3 = reg;
1255                 break;
1256         case ArgInDoubleSSEReg:
1257                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
1258                 arg->inst_left = tree;
1259                 arg->inst_call = call;
1260                 arg->backend.reg3 = reg;
1261                 break;
1262         default:
1263                 g_assert_not_reached ();
1264         }
1265 }
1266
1267 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
1268  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
1269  */
1270
1271 static int
1272 arg_storage_to_ldind (ArgStorage storage)
1273 {
1274         switch (storage) {
1275         case ArgInIReg:
1276                 return CEE_LDIND_I;
1277         case ArgInDoubleSSEReg:
1278                 return CEE_LDIND_R8;
1279         case ArgInFloatSSEReg:
1280                 return CEE_LDIND_R4;
1281         default:
1282                 g_assert_not_reached ();
1283         }
1284
1285         return -1;
1286 }
1287
1288 static void
1289 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1290 {
1291         MonoInst *arg;
1292         MonoMethodSignature *tmp_sig;
1293         MonoInst *sig_arg;
1294                         
1295         /* FIXME: Add support for signature tokens to AOT */
1296         cfg->disable_aot = TRUE;
1297
1298         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1299
1300         /*
1301          * mono_ArgIterator_Setup assumes the signature cookie is 
1302          * passed first and all the arguments which were before it are
1303          * passed on the stack after the signature. So compensate by 
1304          * passing a different signature.
1305          */
1306         tmp_sig = mono_metadata_signature_dup (call->signature);
1307         tmp_sig->param_count -= call->signature->sentinelpos;
1308         tmp_sig->sentinelpos = 0;
1309         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1310
1311         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1312         sig_arg->inst_p0 = tmp_sig;
1313
1314         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1315         arg->inst_left = sig_arg;
1316         arg->type = STACK_PTR;
1317         MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1318 }
1319
1320 /* 
1321  * take the arguments and generate the arch-specific
1322  * instructions to properly call the function in call.
1323  * This includes pushing, moving arguments to the right register
1324  * etc.
1325  * Issue: who does the spilling if needed, and when?
1326  */
1327 MonoCallInst*
1328 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
1329         MonoInst *arg, *in;
1330         MonoMethodSignature *sig;
1331         int i, n, stack_size;
1332         CallInfo *cinfo;
1333         ArgInfo *ainfo;
1334
1335         stack_size = 0;
1336
1337         sig = call->signature;
1338         n = sig->param_count + sig->hasthis;
1339
1340         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, sig->pinvoke);
1341
1342         for (i = 0; i < n; ++i) {
1343                 ainfo = cinfo->args + i;
1344
1345                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
1346                         /* Emit the signature cookie just before the implicit arguments */
1347                         emit_sig_cookie (cfg, call, cinfo);
1348                 }
1349
1350                 if (is_virtual && i == 0) {
1351                         /* the argument will be attached to the call instruction */
1352                         in = call->args [i];
1353                 } else {
1354                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1355                         in = call->args [i];
1356                         arg->cil_code = in->cil_code;
1357                         arg->inst_left = in;
1358                         arg->type = in->type;
1359                         if (!cinfo->stack_usage)
1360                                 /* Keep the assignments to the arg registers in order if possible */
1361                                 MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args);
1362                         else
1363                                 MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1364
1365                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1366                                 guint32 align;
1367                                 guint32 size;
1368
1369                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1370                                         size = sizeof (MonoTypedRef);
1371                                         align = sizeof (gpointer);
1372                                 }
1373                                 else
1374                                 if (sig->pinvoke)
1375                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1376                                 else {
1377                                         /* 
1378                                          * Other backends use mini_type_stack_size (), but that
1379                                          * aligns the size to 8, which is larger than the size of
1380                                          * the source, leading to reads of invalid memory if the
1381                                          * source is at the end of address space.
1382                                          */
1383                                         size = mono_class_value_size (in->klass, &align);
1384                                 }
1385                                 if (ainfo->storage == ArgValuetypeInReg) {
1386                                         if (ainfo->pair_storage [1] == ArgNone) {
1387                                                 MonoInst *load;
1388
1389                                                 /* Simpler case */
1390
1391                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1392                                                 load->inst_left = in;
1393
1394                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1395                                         }
1396                                         else {
1397                                                 /* Trees can't be shared so make a copy */
1398                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1399                                                 MonoInst *load, *load2, *offset_ins;
1400
1401                                                 /* Reg1 */
1402                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1403                                                 load->ssa_op = MONO_SSA_LOAD;
1404                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1405
1406                                                 NEW_ICONST (cfg, offset_ins, 0);
1407                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1408                                                 load2->inst_left = load;
1409                                                 load2->inst_right = offset_ins;
1410
1411                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1412                                                 load->inst_left = load2;
1413
1414                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1415
1416                                                 /* Reg2 */
1417                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1418                                                 load->ssa_op = MONO_SSA_LOAD;
1419                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1420
1421                                                 NEW_ICONST (cfg, offset_ins, 8);
1422                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1423                                                 load2->inst_left = load;
1424                                                 load2->inst_right = offset_ins;
1425
1426                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1427                                                 load->inst_left = load2;
1428
1429                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1430                                                 arg->cil_code = in->cil_code;
1431                                                 arg->type = in->type;
1432                                                 MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1433
1434                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load);
1435
1436                                                 /* Prepend a copy inst */
1437                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1438                                                 arg->cil_code = in->cil_code;
1439                                                 arg->ssa_op = MONO_SSA_STORE;
1440                                                 arg->inst_left = vtaddr;
1441                                                 arg->inst_right = in;
1442                                                 arg->type = in->type;
1443
1444                                                 MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1445                                         }
1446                                 }
1447                                 else if (ainfo->storage == ArgValuetypeAddrInIReg){
1448
1449                                         /* Add a temp variable to the method*/
1450                                         MonoInst *load;
1451                                         MonoInst *vtaddr = mono_compile_create_var (cfg, &in->klass->byval_arg, OP_LOCAL);
1452                                         
1453                                         MONO_INST_NEW (cfg, load, OP_LDADDR);
1454                                         load->ssa_op = MONO_SSA_LOAD;
1455                                         load->inst_left = vtaddr;
1456                                         
1457                                         if (ainfo->pair_storage [0] == ArgInIReg) {
1458                                                 /* Inserted after the copy.  Load the address of the temp to the argument regster.*/
1459                                                 arg->opcode = OP_OUTARG_REG;
1460                                                 arg->inst_left = load;
1461                                                 arg->inst_call = call;
1462                                                 arg->backend.reg3 =  ainfo->pair_regs [0];
1463                                         } 
1464                                         else {
1465                                                 /* Inserted after the copy.  Load the address of the temp on the stack.*/
1466                                                 arg->opcode = OP_OUTARG_VT;
1467                                                 arg->inst_left = load;
1468                                                 arg->type = STACK_PTR;
1469                                                 arg->klass = mono_defaults.int_class;
1470                                                 arg->backend.is_pinvoke = sig->pinvoke;
1471                                                 arg->inst_imm = size;
1472                                         }
1473
1474                                         /*Copy the argument to the temp variable.*/
1475                                         MONO_INST_NEW (cfg, load, OP_MEMCPY);
1476                                         load->backend.memcpy_args = mono_mempool_alloc0 (cfg->mempool, sizeof (MonoMemcpyArgs));
1477                                         load->backend.memcpy_args->size = mono_class_value_size (in->klass, &align);
1478                                         load->backend.memcpy_args->align = align;
1479                                         load->inst_left = (cfg)->varinfo [vtaddr->inst_c0];
1480                                         load->inst_right = in->inst_i0;
1481                                         MONO_INST_LIST_ADD (&load->node, &call->out_args);
1482                                 }
1483                                 else {
1484                                         arg->opcode = OP_OUTARG_VT;
1485                                         arg->klass = in->klass;
1486                                         arg->backend.is_pinvoke = sig->pinvoke;
1487                                         arg->inst_imm = size;
1488                                 }
1489                         }
1490                         else {
1491                                 switch (ainfo->storage) {
1492                                 case ArgInIReg:
1493                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1494                                         break;
1495                                 case ArgInFloatSSEReg:
1496                                 case ArgInDoubleSSEReg:
1497                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1498                                         break;
1499                                 case ArgOnStack:
1500                                         arg->opcode = OP_OUTARG;
1501                                         if (!sig->params [i - sig->hasthis]->byref) {
1502                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1503                                                         arg->opcode = OP_OUTARG_R4;
1504                                                 else
1505                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1506                                                                 arg->opcode = OP_OUTARG_R8;
1507                                         }
1508                                         break;
1509                                 default:
1510                                         g_assert_not_reached ();
1511                                 }
1512                         }
1513                 }
1514         }
1515
1516         /* Handle the case where there are no implicit arguments */
1517         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos)) {
1518                 emit_sig_cookie (cfg, call, cinfo);
1519         }
1520
1521         if (cinfo->need_stack_align) {
1522                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1523                 arg->inst_c0 = 8;
1524                 MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1525         }
1526
1527 #ifdef PLATFORM_WIN32
1528         /* Always reserve 32 bytes of stack space on Win64 */
1529         MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1530         arg->inst_c0 = 32;
1531         MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args);
1532 #endif
1533
1534         if (cfg->method->save_lmf) {
1535                 MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF);
1536                 MONO_INST_LIST_ADD_TAIL (&arg->node, &call->out_args);
1537         }
1538
1539         call->stack_usage = cinfo->stack_usage;
1540         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1541         cfg->flags |= MONO_CFG_HAS_CALLS;
1542
1543         return call;
1544 }
1545
1546 #define EMIT_COND_BRANCH(ins,cond,sign) \
1547 if (ins->flags & MONO_INST_BRLABEL) { \
1548         if (ins->inst_i0->inst_c0) { \
1549                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1550         } else { \
1551                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1552                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1553                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1554                         x86_branch8 (code, cond, 0, sign); \
1555                 else \
1556                         x86_branch32 (code, cond, 0, sign); \
1557         } \
1558 } else { \
1559         if (ins->inst_true_bb->native_offset) { \
1560                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1561         } else { \
1562                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1563                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1564                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1565                         x86_branch8 (code, cond, 0, sign); \
1566                 else \
1567                         x86_branch32 (code, cond, 0, sign); \
1568         } \
1569 }
1570
1571 /* emit an exception if condition is fail */
1572 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1573         do {                                                        \
1574                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1575                 if (tins == NULL) {                                                                             \
1576                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1577                                         MONO_PATCH_INFO_EXC, exc_name);  \
1578                         x86_branch32 (code, cond, 0, signed);               \
1579                 } else {        \
1580                         EMIT_COND_BRANCH (tins, cond, signed);  \
1581                 }                       \
1582         } while (0); 
1583
1584 #define EMIT_FPCOMPARE(code) do { \
1585         amd64_fcompp (code); \
1586         amd64_fnstsw (code); \
1587 } while (0); 
1588
1589 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
1590     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
1591         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
1592         amd64_ ##op (code); \
1593         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
1594         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
1595 } while (0);
1596
1597 static guint8*
1598 emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1599 {
1600         /* 
1601          * FIXME: Add support for thunks
1602          */
1603         {
1604                 gboolean near_call = FALSE;
1605
1606                 /*
1607                  * Indirect calls are expensive so try to make a near call if possible.
1608                  * The caller memory is allocated by the code manager so it is 
1609                  * guaranteed to be at a 32 bit offset.
1610                  */
1611
1612                 if (patch_type != MONO_PATCH_INFO_ABS) {
1613                         /* The target is in memory allocated using the code manager */
1614                         near_call = TRUE;
1615
1616                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
1617                                 if (((MonoMethod*)data)->klass->image->assembly->aot_module)
1618                                         /* The callee might be an AOT method */
1619                                         near_call = FALSE;
1620                                 if (((MonoMethod*)data)->dynamic)
1621                                         /* The target is in malloc-ed memory */
1622                                         near_call = FALSE;
1623                         }
1624
1625                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
1626                                 /* 
1627                                  * The call might go directly to a native function without
1628                                  * the wrapper.
1629                                  */
1630                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name (data);
1631                                 if (mi) {
1632                                         gconstpointer target = mono_icall_get_wrapper (mi);
1633                                         if ((((guint64)target) >> 32) != 0)
1634                                                 near_call = FALSE;
1635                                 }
1636                         }
1637                 }
1638                 else {
1639                         if (mono_find_class_init_trampoline_by_addr (data))
1640                                 near_call = TRUE;
1641                         else {
1642                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
1643                                 if (info) {
1644                                         if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && 
1645                                                 strstr (cfg->method->name, info->name)) {
1646                                                 /* A call to the wrapped function */
1647                                                 if ((((guint64)data) >> 32) == 0)
1648                                                         near_call = TRUE;
1649                                         }
1650                                         else if (info->func == info->wrapper) {
1651                                                 /* No wrapper */
1652                                                 if ((((guint64)info->func) >> 32) == 0)
1653                                                         near_call = TRUE;
1654                                         }
1655                                         else {
1656                                                 /* See the comment in mono_codegen () */
1657                                                 if ((info->name [0] != 'v') || (strstr (info->name, "ves_array_new_va_") == NULL && strstr (info->name, "ves_array_element_address_") == NULL))
1658                                                         near_call = TRUE;
1659                                         }
1660                                 }
1661                                 else if ((((guint64)data) >> 32) == 0)
1662                                         near_call = TRUE;
1663                         }
1664                 }
1665
1666                 if (cfg->method->dynamic)
1667                         /* These methods are allocated using malloc */
1668                         near_call = FALSE;
1669
1670                 if (cfg->compile_aot)
1671                         near_call = TRUE;
1672
1673 #ifdef MONO_ARCH_NOMAP32BIT
1674                 near_call = FALSE;
1675 #endif
1676
1677                 if (near_call) {
1678                         /* 
1679                          * Align the call displacement to an address divisible by 4 so it does
1680                          * not span cache lines. This is required for code patching to work on SMP
1681                          * systems.
1682                          */
1683                         if (((guint32)(code + 1 - cfg->native_code) % 4) != 0)
1684                                 amd64_padding (code, 4 - ((guint32)(code + 1 - cfg->native_code) % 4));
1685                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1686                         amd64_call_code (code, 0);
1687                 }
1688                 else {
1689                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1690                         amd64_set_reg_template (code, GP_SCRATCH_REG);
1691                         amd64_call_reg (code, GP_SCRATCH_REG);
1692                 }
1693         }
1694
1695         return code;
1696 }
1697
1698 static inline guint8*
1699 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data, gboolean win64_adjust_stack)
1700 {
1701 #ifdef PLATFORM_WIN32
1702         if (win64_adjust_stack)
1703                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 32);
1704 #endif
1705         code = emit_call_body (cfg, code, patch_type, data);
1706 #ifdef PLATFORM_WIN32
1707         if (win64_adjust_stack)
1708                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 32);
1709 #endif  
1710         
1711         return code;
1712 }
1713
1714 static inline int
1715 store_membase_imm_to_store_membase_reg (int opcode)
1716 {
1717         switch (opcode) {
1718         case OP_STORE_MEMBASE_IMM:
1719                 return OP_STORE_MEMBASE_REG;
1720         case OP_STOREI4_MEMBASE_IMM:
1721                 return OP_STOREI4_MEMBASE_REG;
1722         case OP_STOREI8_MEMBASE_IMM:
1723                 return OP_STOREI8_MEMBASE_REG;
1724         }
1725
1726         return -1;
1727 }
1728
1729 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
1730
1731 /*
1732  * mono_arch_peephole_pass_1:
1733  *
1734  *   Perform peephole opts which should/can be performed before local regalloc
1735  */
1736 void
1737 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1738 {
1739         MonoInst *ins, *n;
1740
1741         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1742                 MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list);
1743
1744                 switch (ins->opcode) {
1745                 case OP_ADD_IMM:
1746                 case OP_IADD_IMM:
1747                 case OP_LADD_IMM:
1748                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS) && (ins->inst_imm > 0)) {
1749                                 /* 
1750                                  * X86_LEA is like ADD, but doesn't have the
1751                                  * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends 
1752                                  * its operand to 64 bit.
1753                                  */
1754                                 ins->opcode = OP_X86_LEA_MEMBASE;
1755                                 ins->inst_basereg = ins->sreg1;
1756                         }
1757                         break;
1758                 case OP_LXOR:
1759                 case OP_IXOR:
1760                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
1761                                 MonoInst *ins2;
1762
1763                                 /* 
1764                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
1765                                  * the latter has length 2-3 instead of 6 (reverse constant
1766                                  * propagation). These instruction sequences are very common
1767                                  * in the initlocals bblock.
1768                                  */
1769                                 for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2;
1770                                                 ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) {
1771                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
1772                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
1773                                                 ins2->sreg1 = ins->dreg;
1774                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG)) {
1775                                                 /* Continue */
1776                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
1777                                                 NULLIFY_INS (ins2);
1778                                                 /* Continue */
1779                                         } else {
1780                                                 break;
1781                                         }
1782                                 }
1783                         }
1784                         break;
1785                 case OP_COMPARE_IMM:
1786                 case OP_LCOMPARE_IMM:
1787                         /* OP_COMPARE_IMM (reg, 0) 
1788                          * --> 
1789                          * OP_AMD64_TEST_NULL (reg) 
1790                          */
1791                         if (!ins->inst_imm)
1792                                 ins->opcode = OP_AMD64_TEST_NULL;
1793                         break;
1794                 case OP_ICOMPARE_IMM:
1795                         if (!ins->inst_imm)
1796                                 ins->opcode = OP_X86_TEST_NULL;
1797                         break;
1798                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
1799                         /* 
1800                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1801                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1802                          * -->
1803                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1804                          * OP_COMPARE_IMM reg, imm
1805                          *
1806                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1807                          */
1808                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1809                             ins->inst_basereg == last_ins->inst_destbasereg &&
1810                             ins->inst_offset == last_ins->inst_offset) {
1811                                         ins->opcode = OP_ICOMPARE_IMM;
1812                                         ins->sreg1 = last_ins->sreg1;
1813
1814                                         /* check if we can remove cmp reg,0 with test null */
1815                                         if (!ins->inst_imm)
1816                                                 ins->opcode = OP_X86_TEST_NULL;
1817                                 }
1818
1819                         break;
1820                 }
1821
1822                 mono_peephole_ins (bb, ins);
1823         }
1824 }
1825
1826 void
1827 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1828 {
1829         MonoInst *ins, *n;
1830
1831         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1832                 switch (ins->opcode) {
1833                 case OP_ICONST:
1834                 case OP_I8CONST: {
1835                         MonoInst *next;
1836
1837                         /* reg = 0 -> XOR (reg, reg) */
1838                         /* XOR sets cflags on x86, so we cant do it always */
1839                         next = mono_inst_list_next (&ins->node, &bb->ins_list);
1840                         if (ins->inst_c0 == 0 && (!next ||
1841                                         (next && INST_IGNORES_CFLAGS (next->opcode)))) {
1842                                 ins->opcode = OP_LXOR;
1843                                 ins->sreg1 = ins->dreg;
1844                                 ins->sreg2 = ins->dreg;
1845                                 /* Fall through */
1846                         } else {
1847                                 break;
1848                         }
1849                 }
1850                 case OP_LXOR:
1851                         /*
1852                          * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the 
1853                          * 0 result into 64 bits.
1854                          */
1855                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
1856                                 ins->opcode = OP_IXOR;
1857                         }
1858                         /* Fall through */
1859                 case OP_IXOR:
1860                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
1861                                 MonoInst *ins2;
1862
1863                                 /* 
1864                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
1865                                  * the latter has length 2-3 instead of 6 (reverse constant
1866                                  * propagation). These instruction sequences are very common
1867                                  * in the initlocals bblock.
1868                                  */
1869                                 for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2;
1870                                                 ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) {
1871                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
1872                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
1873                                                 ins2->sreg1 = ins->dreg;
1874                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_REG) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG)) {
1875                                                 /* Continue */
1876                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
1877                                                 NULLIFY_INS (ins2);
1878                                                 /* Continue */
1879                                         } else {
1880                                                 break;
1881                                         }
1882                                 }
1883                         }
1884                         break;
1885                 case OP_IADD_IMM:
1886                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1887                                 ins->opcode = OP_X86_INC_REG;
1888                         break;
1889                 case OP_ISUB_IMM:
1890                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1891                                 ins->opcode = OP_X86_DEC_REG;
1892                         break;
1893                 }
1894
1895                 mono_peephole_ins (bb, ins);
1896         }
1897 }
1898
1899 #define NEW_INS(cfg,ins,dest,op) do {   \
1900                 MONO_INST_NEW ((cfg), (dest), (op)); \
1901         (dest)->cil_code = (ins)->cil_code; \
1902                 MONO_INST_LIST_ADD_TAIL (&(dest)->node, &(ins)->node); \
1903         } while (0)
1904
1905 /*
1906  * mono_arch_lowering_pass:
1907  *
1908  *  Converts complex opcodes into simpler ones so that each IR instruction
1909  * corresponds to one machine instruction.
1910  */
1911 void
1912 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1913 {
1914         MonoInst *ins, *n, *temp;
1915
1916         if (bb->max_vreg > cfg->rs->next_vreg)
1917                 cfg->rs->next_vreg = bb->max_vreg;
1918
1919         /*
1920          * FIXME: Need to add more instructions, but the current machine 
1921          * description can't model some parts of the composite instructions like
1922          * cdq.
1923          */
1924         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1925                 switch (ins->opcode) {
1926                 case OP_DIV_IMM:
1927                 case OP_REM_IMM:
1928                 case OP_IDIV_IMM:
1929                 case OP_IREM_IMM:
1930                 case OP_IDIV_UN_IMM:
1931                 case OP_IREM_UN_IMM:
1932                         mono_decompose_op_imm (cfg, ins);
1933                         break;
1934                 case OP_COMPARE_IMM:
1935                 case OP_LCOMPARE_IMM:
1936                         if (!amd64_is_imm32 (ins->inst_imm)) {
1937                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
1938                                 temp->inst_c0 = ins->inst_imm;
1939                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1940                                 ins->opcode = OP_COMPARE;
1941                                 ins->sreg2 = temp->dreg;
1942                         }
1943                         break;
1944                 case OP_LOAD_MEMBASE:
1945                 case OP_LOADI8_MEMBASE:
1946                         if (!amd64_is_imm32 (ins->inst_offset)) {
1947                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
1948                                 temp->inst_c0 = ins->inst_offset;
1949                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1950                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
1951                                 ins->inst_indexreg = temp->dreg;
1952                         }
1953                         break;
1954                 case OP_STORE_MEMBASE_IMM:
1955                 case OP_STOREI8_MEMBASE_IMM:
1956                         if (!amd64_is_imm32 (ins->inst_imm)) {
1957                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
1958                                 temp->inst_c0 = ins->inst_imm;
1959                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1960                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
1961                                 ins->sreg1 = temp->dreg;
1962                         }
1963                         break;
1964                 default:
1965                         break;
1966                 }
1967         }
1968
1969         bb->max_vreg = cfg->rs->next_vreg;
1970 }
1971
1972 static const int 
1973 branch_cc_table [] = {
1974         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1975         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1976         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1977 };
1978
1979 /* Maps CMP_... constants to X86_CC_... constants */
1980 static const int
1981 cc_table [] = {
1982         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1983         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1984 };
1985
1986 static const int
1987 cc_signed_table [] = {
1988         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1989         FALSE, FALSE, FALSE, FALSE
1990 };
1991
1992 /*#include "cprop.c"*/
1993
1994 static unsigned char*
1995 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
1996 {
1997         amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
1998
1999         if (size == 1)
2000                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
2001         else if (size == 2)
2002                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
2003         return code;
2004 }
2005
2006 static unsigned char*
2007 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2008 {
2009         int sreg = tree->sreg1;
2010         int need_touch = FALSE;
2011
2012 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2013         if (!tree->flags & MONO_INST_INIT)
2014                 need_touch = TRUE;
2015 #endif
2016
2017         if (need_touch) {
2018                 guint8* br[5];
2019
2020                 /*
2021                  * Under Windows:
2022                  * If requested stack size is larger than one page,
2023                  * perform stack-touch operation
2024                  */
2025                 /*
2026                  * Generate stack probe code.
2027                  * Under Windows, it is necessary to allocate one page at a time,
2028                  * "touching" stack after each successful sub-allocation. This is
2029                  * because of the way stack growth is implemented - there is a
2030                  * guard page before the lowest stack page that is currently commited.
2031                  * Stack normally grows sequentially so OS traps access to the
2032                  * guard page and commits more pages when needed.
2033                  */
2034                 amd64_test_reg_imm (code, sreg, ~0xFFF);
2035                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2036
2037                 br[2] = code; /* loop */
2038                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
2039                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
2040                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2041                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2042                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2043                 amd64_patch (br[3], br[2]);
2044                 amd64_test_reg_reg (code, sreg, sreg);
2045                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2046                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2047
2048                 br[1] = code; x86_jump8 (code, 0);
2049
2050                 amd64_patch (br[0], code);
2051                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2052                 amd64_patch (br[1], code);
2053                 amd64_patch (br[4], code);
2054         }
2055         else
2056                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
2057
2058         if (tree->flags & MONO_INST_INIT) {
2059                 int offset = 0;
2060                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
2061                         amd64_push_reg (code, AMD64_RAX);
2062                         offset += 8;
2063                 }
2064                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
2065                         amd64_push_reg (code, AMD64_RCX);
2066                         offset += 8;
2067                 }
2068                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
2069                         amd64_push_reg (code, AMD64_RDI);
2070                         offset += 8;
2071                 }
2072                 
2073                 amd64_shift_reg_imm (code, X86_SHR, sreg, 3);
2074                 if (sreg != AMD64_RCX)
2075                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
2076                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2077                                 
2078                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
2079                 amd64_cld (code);
2080                 amd64_prefix (code, X86_REP_PREFIX);
2081                 amd64_stosl (code);
2082                 
2083                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
2084                         amd64_pop_reg (code, AMD64_RDI);
2085                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
2086                         amd64_pop_reg (code, AMD64_RCX);
2087                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
2088                         amd64_pop_reg (code, AMD64_RAX);
2089         }
2090         return code;
2091 }
2092
2093 static guint8*
2094 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2095 {
2096         CallInfo *cinfo;
2097         guint32 quad;
2098
2099         /* Move return value to the target register */
2100         /* FIXME: do this in the local reg allocator */
2101         switch (ins->opcode) {
2102         case OP_CALL:
2103         case OP_CALL_REG:
2104         case OP_CALL_MEMBASE:
2105         case OP_LCALL:
2106         case OP_LCALL_REG:
2107         case OP_LCALL_MEMBASE:
2108                 g_assert (ins->dreg == AMD64_RAX);
2109                 break;
2110         case OP_FCALL:
2111         case OP_FCALL_REG:
2112         case OP_FCALL_MEMBASE:
2113                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
2114                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
2115                 }
2116                 else {
2117                         if (ins->dreg != AMD64_XMM0)
2118                                 amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
2119                 }
2120                 break;
2121         case OP_VCALL:
2122         case OP_VCALL_REG:
2123         case OP_VCALL_MEMBASE:
2124                 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
2125                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2126                         /* Pop the destination address from the stack */
2127                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2128                         amd64_pop_reg (code, AMD64_RCX);
2129                         
2130                         for (quad = 0; quad < 2; quad ++) {
2131                                 switch (cinfo->ret.pair_storage [quad]) {
2132                                 case ArgInIReg:
2133                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
2134                                         break;
2135                                 case ArgInFloatSSEReg:
2136                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2137                                         break;
2138                                 case ArgInDoubleSSEReg:
2139                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2140                                         break;
2141                                 case ArgNone:
2142                                         break;
2143                                 default:
2144                                         NOT_IMPLEMENTED;
2145                                 }
2146                         }
2147                 }
2148                 break;
2149         }
2150
2151         return code;
2152 }
2153
2154 /*
2155  * emit_tls_get:
2156  * @code: buffer to store code to
2157  * @dreg: hard register where to place the result
2158  * @tls_offset: offset info
2159  *
2160  * emit_tls_get emits in @code the native code that puts in the dreg register
2161  * the item in the thread local storage identified by tls_offset.
2162  *
2163  * Returns: a pointer to the end of the stored code
2164  */
2165 static guint8*
2166 emit_tls_get (guint8* code, int dreg, int tls_offset)
2167 {
2168         if (optimize_for_xen) {
2169                 x86_prefix (code, X86_FS_PREFIX);
2170                 amd64_mov_reg_mem (code, dreg, 0, 8);
2171                 amd64_mov_reg_membase (code, dreg, dreg, tls_offset, 8);
2172         } else {
2173                 x86_prefix (code, X86_FS_PREFIX);
2174                 amd64_mov_reg_mem (code, dreg, tls_offset, 8);
2175         }
2176         return code;
2177 }
2178
2179 /*
2180  * emit_load_volatile_arguments:
2181  *
2182  *  Load volatile arguments from the stack to the original input registers.
2183  * Required before a tail call.
2184  */
2185 static guint8*
2186 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2187 {
2188         MonoMethod *method = cfg->method;
2189         MonoMethodSignature *sig;
2190         MonoInst *ins;
2191         CallInfo *cinfo;
2192         guint32 i, quad;
2193
2194         /* FIXME: Generate intermediate code instead */
2195
2196         sig = mono_method_signature (method);
2197
2198         cinfo = cfg->arch.cinfo;
2199         
2200         /* This is the opposite of the code in emit_prolog */
2201         if (sig->ret->type != MONO_TYPE_VOID) {
2202                 if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
2203                         amd64_mov_reg_membase (code, cinfo->ret.reg, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, 8);
2204         }
2205
2206         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2207                 ArgInfo *ainfo = cinfo->args + i;
2208                 MonoType *arg_type;
2209                 ins = cfg->args [i];
2210
2211                 if (sig->hasthis && (i == 0))
2212                         arg_type = &mono_defaults.object_class->byval_arg;
2213                 else
2214                         arg_type = sig->params [i - sig->hasthis];
2215
2216                 if (ins->opcode != OP_REGVAR) {
2217                         switch (ainfo->storage) {
2218                         case ArgInIReg: {
2219                                 guint32 size = 8;
2220
2221                                 /* FIXME: I1 etc */
2222                                 amd64_mov_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset, size);
2223                                 break;
2224                         }
2225                         case ArgInFloatSSEReg:
2226                                 amd64_movss_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
2227                                 break;
2228                         case ArgInDoubleSSEReg:
2229                                 amd64_movsd_reg_membase (code, ainfo->reg, ins->inst_basereg, ins->inst_offset);
2230                                 break;
2231                         case ArgValuetypeInReg:
2232                                 for (quad = 0; quad < 2; quad ++) {
2233                                         switch (ainfo->pair_storage [quad]) {
2234                                         case ArgInIReg:
2235                                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
2236                                                 break;
2237                                         case ArgInFloatSSEReg:
2238                                         case ArgInDoubleSSEReg:
2239                                                 g_assert_not_reached ();
2240                                                 break;
2241                                         case ArgNone:
2242                                                 break;
2243                                         default:
2244                                                 g_assert_not_reached ();
2245                                         }
2246                                 }
2247                                 break;
2248                         default:
2249                                 break;
2250                         }
2251                 }
2252                 else {
2253                         g_assert (ainfo->storage == ArgInIReg);
2254
2255                         amd64_mov_reg_reg (code, ainfo->reg, ins->dreg, 8);
2256                 }
2257         }
2258
2259         return code;
2260 }
2261
2262 #define REAL_PRINT_REG(text,reg) \
2263 mono_assert (reg >= 0); \
2264 amd64_push_reg (code, AMD64_RAX); \
2265 amd64_push_reg (code, AMD64_RDX); \
2266 amd64_push_reg (code, AMD64_RCX); \
2267 amd64_push_reg (code, reg); \
2268 amd64_push_imm (code, reg); \
2269 amd64_push_imm (code, text " %d %p\n"); \
2270 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2271 amd64_call_reg (code, AMD64_RAX); \
2272 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2273 amd64_pop_reg (code, AMD64_RCX); \
2274 amd64_pop_reg (code, AMD64_RDX); \
2275 amd64_pop_reg (code, AMD64_RAX);
2276
2277 /* benchmark and set based on cpu */
2278 #define LOOP_ALIGNMENT 8
2279 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2280
2281 void
2282 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2283 {
2284         MonoInst *ins;
2285         MonoCallInst *call;
2286         guint offset;
2287         guint8 *code = cfg->native_code + cfg->code_len;
2288         guint last_offset = 0;
2289         int max_len, cpos;
2290
2291         if (cfg->opt & MONO_OPT_LOOP) {
2292                 int pad, align = LOOP_ALIGNMENT;
2293                 /* set alignment depending on cpu */
2294                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2295                         pad = align - pad;
2296                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2297                         amd64_padding (code, pad);
2298                         cfg->code_len += pad;
2299                         bb->native_offset = cfg->code_len;
2300                 }
2301         }
2302
2303         if (cfg->verbose_level > 2)
2304                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2305
2306         cpos = bb->max_offset;
2307
2308         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2309                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2310                 g_assert (!cfg->compile_aot);
2311                 cpos += 6;
2312
2313                 cov->data [bb->dfn].cil_code = bb->cil_code;
2314                 amd64_mov_reg_imm (code, AMD64_R11, (guint64)&cov->data [bb->dfn].count);
2315                 /* this is not thread save, but good enough */
2316                 amd64_inc_membase (code, AMD64_R11, 0);
2317         }
2318
2319         offset = code - cfg->native_code;
2320
2321         mono_debug_open_block (cfg, bb, offset);
2322
2323         MONO_BB_FOR_EACH_INS (bb, ins) {
2324                 offset = code - cfg->native_code;
2325
2326                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2327
2328                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2329                         cfg->code_size *= 2;
2330                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2331                         code = cfg->native_code + offset;
2332                         mono_jit_stats.code_reallocs++;
2333                 }
2334
2335                 if (cfg->debug_info)
2336                         mono_debug_record_line_number (cfg, ins, offset);
2337
2338                 switch (ins->opcode) {
2339                 case OP_BIGMUL:
2340                         amd64_mul_reg (code, ins->sreg2, TRUE);
2341                         break;
2342                 case OP_BIGMUL_UN:
2343                         amd64_mul_reg (code, ins->sreg2, FALSE);
2344                         break;
2345                 case OP_X86_SETEQ_MEMBASE:
2346                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2347                         break;
2348                 case OP_STOREI1_MEMBASE_IMM:
2349                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2350                         break;
2351                 case OP_STOREI2_MEMBASE_IMM:
2352                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2353                         break;
2354                 case OP_STOREI4_MEMBASE_IMM:
2355                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2356                         break;
2357                 case OP_STOREI1_MEMBASE_REG:
2358                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2359                         break;
2360                 case OP_STOREI2_MEMBASE_REG:
2361                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2362                         break;
2363                 case OP_STORE_MEMBASE_REG:
2364                 case OP_STOREI8_MEMBASE_REG:
2365                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2366                         break;
2367                 case OP_STOREI4_MEMBASE_REG:
2368                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2369                         break;
2370                 case OP_STORE_MEMBASE_IMM:
2371                 case OP_STOREI8_MEMBASE_IMM:
2372                         g_assert (amd64_is_imm32 (ins->inst_imm));
2373                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2374                         break;
2375                 case OP_LOAD_MEM:
2376                 case OP_LOADI8_MEM:
2377                         // FIXME: Decompose this earlier
2378                         if (amd64_is_imm32 (ins->inst_imm))
2379                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, sizeof (gpointer));
2380                         else {
2381                                 amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
2382                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 8);
2383                         }
2384                         break;
2385                 case OP_LOADI4_MEM:
2386                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
2387                         amd64_movsxd_reg_membase (code, ins->dreg, ins->dreg, 0);
2388                         break;
2389                 case OP_LOADU4_MEM:
2390                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2391                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2392                         break;
2393                 case OP_LOADU1_MEM:
2394                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
2395                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, FALSE);
2396                         break;
2397                 case OP_LOADU2_MEM:
2398                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
2399                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, TRUE);
2400                         break;
2401                 case OP_LOAD_MEMBASE:
2402                 case OP_LOADI8_MEMBASE:
2403                         g_assert (amd64_is_imm32 (ins->inst_offset));
2404                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2405                         break;
2406                 case OP_LOADI4_MEMBASE:
2407                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2408                         break;
2409                 case OP_LOADU4_MEMBASE:
2410                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2411                         break;
2412                 case OP_LOADU1_MEMBASE:
2413                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2414                         break;
2415                 case OP_LOADI1_MEMBASE:
2416                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2417                         break;
2418                 case OP_LOADU2_MEMBASE:
2419                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2420                         break;
2421                 case OP_LOADI2_MEMBASE:
2422                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2423                         break;
2424                 case OP_AMD64_LOADI8_MEMINDEX:
2425                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
2426                         break;
2427                 case OP_LCONV_TO_I1:
2428                 case OP_ICONV_TO_I1:
2429                 case OP_SEXT_I1:
2430                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2431                         break;
2432                 case OP_LCONV_TO_I2:
2433                 case OP_ICONV_TO_I2:
2434                 case OP_SEXT_I2:
2435                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2436                         break;
2437                 case OP_LCONV_TO_U1:
2438                 case OP_ICONV_TO_U1:
2439                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2440                         break;
2441                 case OP_LCONV_TO_U2:
2442                 case OP_ICONV_TO_U2:
2443                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2444                         break;
2445                 case OP_ZEXT_I4:
2446                         /* Clean out the upper word */
2447                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2448                         break;
2449                 case OP_SEXT_I4:
2450                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2451                         break;
2452                 case OP_COMPARE:
2453                 case OP_LCOMPARE:
2454                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2455                         break;
2456                 case OP_COMPARE_IMM:
2457                 case OP_LCOMPARE_IMM:
2458                         g_assert (amd64_is_imm32 (ins->inst_imm));
2459                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2460                         break;
2461                 case OP_X86_COMPARE_REG_MEMBASE:
2462                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2463                         break;
2464                 case OP_X86_TEST_NULL:
2465                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
2466                         break;
2467                 case OP_AMD64_TEST_NULL:
2468                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
2469                         break;
2470
2471                 case OP_X86_ADD_REG_MEMBASE:
2472                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2473                         break;
2474                 case OP_X86_SUB_REG_MEMBASE:
2475                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2476                         break;
2477                 case OP_X86_AND_REG_MEMBASE:
2478                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2479                         break;
2480                 case OP_X86_OR_REG_MEMBASE:
2481                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2482                         break;
2483                 case OP_X86_XOR_REG_MEMBASE:
2484                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2485                         break;
2486
2487                 case OP_X86_ADD_MEMBASE_IMM:
2488                         /* FIXME: Make a 64 version too */
2489                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2490                         break;
2491                 case OP_X86_SUB_MEMBASE_IMM:
2492                         g_assert (amd64_is_imm32 (ins->inst_imm));
2493                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2494                         break;
2495                 case OP_X86_AND_MEMBASE_IMM:
2496                         g_assert (amd64_is_imm32 (ins->inst_imm));
2497                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2498                         break;
2499                 case OP_X86_OR_MEMBASE_IMM:
2500                         g_assert (amd64_is_imm32 (ins->inst_imm));
2501                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2502                         break;
2503                 case OP_X86_XOR_MEMBASE_IMM:
2504                         g_assert (amd64_is_imm32 (ins->inst_imm));
2505                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2506                         break;
2507                 case OP_X86_ADD_MEMBASE_REG:
2508                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2509                         break;
2510                 case OP_X86_SUB_MEMBASE_REG:
2511                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2512                         break;
2513                 case OP_X86_AND_MEMBASE_REG:
2514                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2515                         break;
2516                 case OP_X86_OR_MEMBASE_REG:
2517                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2518                         break;
2519                 case OP_X86_XOR_MEMBASE_REG:
2520                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2521                         break;
2522                 case OP_X86_INC_MEMBASE:
2523                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2524                         break;
2525                 case OP_X86_INC_REG:
2526                         amd64_inc_reg_size (code, ins->dreg, 4);
2527                         break;
2528                 case OP_X86_DEC_MEMBASE:
2529                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2530                         break;
2531                 case OP_X86_DEC_REG:
2532                         amd64_dec_reg_size (code, ins->dreg, 4);
2533                         break;
2534                 case OP_X86_MUL_REG_MEMBASE:
2535                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2536                         break;
2537                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
2538                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2539                         break;
2540                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2541                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2542                         break;
2543                 case OP_AMD64_COMPARE_MEMBASE_REG:
2544                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2545                         break;
2546                 case OP_AMD64_COMPARE_MEMBASE_IMM:
2547                         g_assert (amd64_is_imm32 (ins->inst_imm));
2548                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2549                         break;
2550                 case OP_X86_COMPARE_MEMBASE8_IMM:
2551                         amd64_alu_membase8_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2552                         break;
2553                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
2554                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2555                         break;
2556                 case OP_AMD64_COMPARE_REG_MEMBASE:
2557                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2558                         break;
2559
2560                 case OP_AMD64_ADD_REG_MEMBASE:
2561                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2562                         break;
2563                 case OP_AMD64_SUB_REG_MEMBASE:
2564                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2565                         break;
2566                 case OP_AMD64_AND_REG_MEMBASE:
2567                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2568                         break;
2569                 case OP_AMD64_OR_REG_MEMBASE:
2570                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2571                         break;
2572                 case OP_AMD64_XOR_REG_MEMBASE:
2573                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
2574                         break;
2575
2576                 case OP_AMD64_ADD_MEMBASE_REG:
2577                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2578                         break;
2579                 case OP_AMD64_SUB_MEMBASE_REG:
2580                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2581                         break;
2582                 case OP_AMD64_AND_MEMBASE_REG:
2583                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2584                         break;
2585                 case OP_AMD64_OR_MEMBASE_REG:
2586                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2587                         break;
2588                 case OP_AMD64_XOR_MEMBASE_REG:
2589                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
2590                         break;
2591
2592                 case OP_AMD64_ADD_MEMBASE_IMM:
2593                         g_assert (amd64_is_imm32 (ins->inst_imm));
2594                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2595                         break;
2596                 case OP_AMD64_SUB_MEMBASE_IMM:
2597                         g_assert (amd64_is_imm32 (ins->inst_imm));
2598                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2599                         break;
2600                 case OP_AMD64_AND_MEMBASE_IMM:
2601                         g_assert (amd64_is_imm32 (ins->inst_imm));
2602                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2603                         break;
2604                 case OP_AMD64_OR_MEMBASE_IMM:
2605                         g_assert (amd64_is_imm32 (ins->inst_imm));
2606                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2607                         break;
2608                 case OP_AMD64_XOR_MEMBASE_IMM:
2609                         g_assert (amd64_is_imm32 (ins->inst_imm));
2610                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
2611                         break;
2612
2613                 case OP_BREAK:
2614                         amd64_breakpoint (code);
2615                         break;
2616                 case OP_NOP:
2617                 case OP_DUMMY_USE:
2618                 case OP_DUMMY_STORE:
2619                 case OP_NOT_REACHED:
2620                 case OP_NOT_NULL:
2621                         break;
2622                 case OP_ADDCC:
2623                 case OP_LADD:
2624                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2625                         break;
2626                 case OP_ADC:
2627                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2628                         break;
2629                 case OP_ADD_IMM:
2630                 case OP_LADD_IMM:
2631                         g_assert (amd64_is_imm32 (ins->inst_imm));
2632                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2633                         break;
2634                 case OP_ADC_IMM:
2635                         g_assert (amd64_is_imm32 (ins->inst_imm));
2636                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2637                         break;
2638                 case OP_SUBCC:
2639                 case OP_LSUB:
2640                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2641                         break;
2642                 case OP_SBB:
2643                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2644                         break;
2645                 case OP_SUB_IMM:
2646                 case OP_LSUB_IMM:
2647                         g_assert (amd64_is_imm32 (ins->inst_imm));
2648                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2649                         break;
2650                 case OP_SBB_IMM:
2651                         g_assert (amd64_is_imm32 (ins->inst_imm));
2652                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2653                         break;
2654                 case OP_LAND:
2655                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2656                         break;
2657                 case OP_AND_IMM:
2658                 case OP_LAND_IMM:
2659                         g_assert (amd64_is_imm32 (ins->inst_imm));
2660                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2661                         break;
2662                 case OP_LMUL:
2663                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2664                         break;
2665                 case OP_MUL_IMM:
2666                 case OP_LMUL_IMM:
2667                 case OP_IMUL_IMM: {
2668                         guint32 size = (ins->opcode == OP_IMUL_IMM) ? 4 : 8;
2669                         
2670                         switch (ins->inst_imm) {
2671                         case 2:
2672                                 /* MOV r1, r2 */
2673                                 /* ADD r1, r1 */
2674                                 if (ins->dreg != ins->sreg1)
2675                                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, size);
2676                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2677                                 break;
2678                         case 3:
2679                                 /* LEA r1, [r2 + r2*2] */
2680                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2681                                 break;
2682                         case 5:
2683                                 /* LEA r1, [r2 + r2*4] */
2684                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2685                                 break;
2686                         case 6:
2687                                 /* LEA r1, [r2 + r2*2] */
2688                                 /* ADD r1, r1          */
2689                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2690                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2691                                 break;
2692                         case 9:
2693                                 /* LEA r1, [r2 + r2*8] */
2694                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2695                                 break;
2696                         case 10:
2697                                 /* LEA r1, [r2 + r2*4] */
2698                                 /* ADD r1, r1          */
2699                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2700                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2701                                 break;
2702                         case 12:
2703                                 /* LEA r1, [r2 + r2*2] */
2704                                 /* SHL r1, 2           */
2705                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2706                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2707                                 break;
2708                         case 25:
2709                                 /* LEA r1, [r2 + r2*4] */
2710                                 /* LEA r1, [r1 + r1*4] */
2711                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2712                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2713                                 break;
2714                         case 100:
2715                                 /* LEA r1, [r2 + r2*4] */
2716                                 /* SHL r1, 2           */
2717                                 /* LEA r1, [r1 + r1*4] */
2718                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2719                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2720                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2721                                 break;
2722                         default:
2723                                 amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, size);
2724                                 break;
2725                         }
2726                         break;
2727                 }
2728                 case OP_LDIV:
2729                 case OP_LREM:
2730                         /* Regalloc magic makes the div/rem cases the same */
2731                         if (ins->sreg2 == AMD64_RDX) {
2732                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
2733                                 amd64_cdq (code);
2734                                 amd64_div_membase (code, AMD64_RSP, -8, TRUE);
2735                         } else {
2736                                 amd64_cdq (code);
2737                                 amd64_div_reg (code, ins->sreg2, TRUE);
2738                         }
2739                         break;
2740                 case OP_LDIV_UN:
2741                 case OP_LREM_UN:
2742                         if (ins->sreg2 == AMD64_RDX) {
2743                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
2744                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2745                                 amd64_div_membase (code, AMD64_RSP, -8, FALSE);
2746                         } else {
2747                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2748                                 amd64_div_reg (code, ins->sreg2, FALSE);
2749                         }
2750                         break;
2751                 case OP_IDIV:
2752                 case OP_IREM:
2753                         if (ins->sreg2 == AMD64_RDX) {
2754                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
2755                                 amd64_cdq_size (code, 4);
2756                                 amd64_div_membase_size (code, AMD64_RSP, -8, TRUE, 4);
2757                         } else {
2758                                 amd64_cdq_size (code, 4);
2759                                 amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
2760                         }
2761                         break;
2762                 case OP_IDIV_UN:
2763                 case OP_IREM_UN:
2764                         if (ins->sreg2 == AMD64_RDX) {
2765                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
2766                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2767                                 amd64_div_membase_size (code, AMD64_RSP, -8, FALSE, 4);
2768                         } else {
2769                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2770                                 amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
2771                         }
2772                         break;
2773                 case OP_LMUL_OVF:
2774                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2775                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2776                         break;
2777                 case OP_LOR:
2778                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2779                         break;
2780                 case OP_OR_IMM:
2781                 case OP_LOR_IMM:
2782                         g_assert (amd64_is_imm32 (ins->inst_imm));
2783                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2784                         break;
2785                 case OP_LXOR:
2786                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2787                         break;
2788                 case OP_XOR_IMM:
2789                 case OP_LXOR_IMM:
2790                         g_assert (amd64_is_imm32 (ins->inst_imm));
2791                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2792                         break;
2793                 case OP_LSHL:
2794                         g_assert (ins->sreg2 == AMD64_RCX);
2795                         amd64_shift_reg (code, X86_SHL, ins->dreg);
2796                         break;
2797                 case OP_LSHR:
2798                         g_assert (ins->sreg2 == AMD64_RCX);
2799                         amd64_shift_reg (code, X86_SAR, ins->dreg);
2800                         break;
2801                 case OP_SHR_IMM:
2802                         g_assert (amd64_is_imm32 (ins->inst_imm));
2803                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2804                         break;
2805                 case OP_LSHR_IMM:
2806                         g_assert (amd64_is_imm32 (ins->inst_imm));
2807                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2808                         break;
2809                 case OP_SHR_UN_IMM:
2810                         g_assert (amd64_is_imm32 (ins->inst_imm));
2811                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2812                         break;
2813                 case OP_LSHR_UN_IMM:
2814                         g_assert (amd64_is_imm32 (ins->inst_imm));
2815                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2816                         break;
2817                 case OP_LSHR_UN:
2818                         g_assert (ins->sreg2 == AMD64_RCX);
2819                         amd64_shift_reg (code, X86_SHR, ins->dreg);
2820                         break;
2821                 case OP_SHL_IMM:
2822                         g_assert (amd64_is_imm32 (ins->inst_imm));
2823                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2824                         break;
2825                 case OP_LSHL_IMM:
2826                         g_assert (amd64_is_imm32 (ins->inst_imm));
2827                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2828                         break;
2829
2830                 case OP_IADDCC:
2831                 case OP_IADD:
2832                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
2833                         break;
2834                 case OP_IADC:
2835                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
2836                         break;
2837                 case OP_IADD_IMM:
2838                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
2839                         break;
2840                 case OP_IADC_IMM:
2841                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
2842                         break;
2843                 case OP_ISUBCC:
2844                 case OP_ISUB:
2845                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
2846                         break;
2847                 case OP_ISBB:
2848                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
2849                         break;
2850                 case OP_ISUB_IMM:
2851                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
2852                         break;
2853                 case OP_ISBB_IMM:
2854                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
2855                         break;
2856                 case OP_IAND:
2857                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
2858                         break;
2859                 case OP_IAND_IMM:
2860                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
2861                         break;
2862                 case OP_IOR:
2863                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
2864                         break;
2865                 case OP_IOR_IMM:
2866                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
2867                         break;
2868                 case OP_IXOR:
2869                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
2870                         break;
2871                 case OP_IXOR_IMM:
2872                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
2873                         break;
2874                 case OP_INEG:
2875                         amd64_neg_reg_size (code, ins->sreg1, 4);
2876                         break;
2877                 case OP_INOT:
2878                         amd64_not_reg_size (code, ins->sreg1, 4);
2879                         break;
2880                 case OP_ISHL:
2881                         g_assert (ins->sreg2 == AMD64_RCX);
2882                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
2883                         break;
2884                 case OP_ISHR:
2885                         g_assert (ins->sreg2 == AMD64_RCX);
2886                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
2887                         break;
2888                 case OP_ISHR_IMM:
2889                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2890                         break;
2891                 case OP_ISHR_UN_IMM:
2892                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2893                         break;
2894                 case OP_ISHR_UN:
2895                         g_assert (ins->sreg2 == AMD64_RCX);
2896                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2897                         break;
2898                 case OP_ISHL_IMM:
2899                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2900                         break;
2901                 case OP_IMUL:
2902                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2903                         break;
2904                 case OP_IMUL_OVF:
2905                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2906                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2907                         break;
2908                 case OP_IMUL_OVF_UN:
2909                 case OP_LMUL_OVF_UN: {
2910                         /* the mul operation and the exception check should most likely be split */
2911                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2912                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
2913                         /*g_assert (ins->sreg2 == X86_EAX);
2914                         g_assert (ins->dreg == X86_EAX);*/
2915                         if (ins->sreg2 == X86_EAX) {
2916                                 non_eax_reg = ins->sreg1;
2917                         } else if (ins->sreg1 == X86_EAX) {
2918                                 non_eax_reg = ins->sreg2;
2919                         } else {
2920                                 /* no need to save since we're going to store to it anyway */
2921                                 if (ins->dreg != X86_EAX) {
2922                                         saved_eax = TRUE;
2923                                         amd64_push_reg (code, X86_EAX);
2924                                 }
2925                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
2926                                 non_eax_reg = ins->sreg2;
2927                         }
2928                         if (ins->dreg == X86_EDX) {
2929                                 if (!saved_eax) {
2930                                         saved_eax = TRUE;
2931                                         amd64_push_reg (code, X86_EAX);
2932                                 }
2933                         } else {
2934                                 saved_edx = TRUE;
2935                                 amd64_push_reg (code, X86_EDX);
2936                         }
2937                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
2938                         /* save before the check since pop and mov don't change the flags */
2939                         if (ins->dreg != X86_EAX)
2940                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
2941                         if (saved_edx)
2942                                 amd64_pop_reg (code, X86_EDX);
2943                         if (saved_eax)
2944                                 amd64_pop_reg (code, X86_EAX);
2945                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2946                         break;
2947                 }
2948                 case OP_ICOMPARE:
2949                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
2950                         break;
2951                 case OP_ICOMPARE_IMM:
2952                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
2953                         break;
2954                 case OP_IBEQ:
2955                 case OP_IBLT:
2956                 case OP_IBGT:
2957                 case OP_IBGE:
2958                 case OP_IBLE:
2959                 case OP_LBEQ:
2960                 case OP_LBLT:
2961                 case OP_LBGT:
2962                 case OP_LBGE:
2963                 case OP_LBLE:
2964                 case OP_IBNE_UN:
2965                 case OP_IBLT_UN:
2966                 case OP_IBGT_UN:
2967                 case OP_IBGE_UN:
2968                 case OP_IBLE_UN:
2969                 case OP_LBNE_UN:
2970                 case OP_LBLT_UN:
2971                 case OP_LBGT_UN:
2972                 case OP_LBGE_UN:
2973                 case OP_LBLE_UN:
2974                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2975                         break;
2976
2977                 case OP_LNOT:
2978                         amd64_not_reg (code, ins->sreg1);
2979                         break;
2980                 case OP_LNEG:
2981                         amd64_neg_reg (code, ins->sreg1);
2982                         break;
2983
2984                 case OP_ICONST:
2985                 case OP_I8CONST:
2986                         if ((((guint64)ins->inst_c0) >> 32) == 0)
2987                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
2988                         else
2989                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
2990                         break;
2991                 case OP_AOTCONST:
2992                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2993                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8);
2994                         break;
2995                 case OP_MOVE:
2996                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
2997                         break;
2998                 case OP_AMD64_SET_XMMREG_R4: {
2999                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
3000                         break;
3001                 }
3002                 case OP_AMD64_SET_XMMREG_R8: {
3003                         if (ins->dreg != ins->sreg1)
3004                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
3005                         break;
3006                 }
3007                 case OP_JMP: {
3008                         /*
3009                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3010                          * Keep in sync with the code in emit_epilog.
3011                          */
3012                         int pos = 0, i;
3013
3014                         /* FIXME: no tracing support... */
3015                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3016                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3017
3018                         g_assert (!cfg->method->save_lmf);
3019
3020                         code = emit_load_volatile_arguments (cfg, code);
3021
3022                         if (cfg->arch.omit_fp) {
3023                                 guint32 save_offset = 0;
3024                                 /* Pop callee-saved registers */
3025                                 for (i = 0; i < AMD64_NREG; ++i)
3026                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3027                                                 amd64_mov_reg_membase (code, i, AMD64_RSP, save_offset, 8);
3028                                                 save_offset += 8;
3029                                         }
3030                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
3031                         }
3032                         else {
3033                                 for (i = 0; i < AMD64_NREG; ++i)
3034                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
3035                                                 pos -= sizeof (gpointer);
3036                         
3037                                 if (pos)
3038                                         amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
3039
3040                                 /* Pop registers in reverse order */
3041                                 for (i = AMD64_NREG - 1; i > 0; --i)
3042                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3043                                                 amd64_pop_reg (code, i);
3044                                         }
3045
3046                                 amd64_leave (code);
3047                         }
3048
3049                         offset = code - cfg->native_code;
3050                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3051                         if (cfg->compile_aot)
3052                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
3053                         else
3054                                 amd64_set_reg_template (code, AMD64_R11);
3055                         amd64_jump_reg (code, AMD64_R11);
3056                         break;
3057                 }
3058                 case OP_CHECK_THIS:
3059                         /* ensure ins->sreg1 is not NULL */
3060                         amd64_alu_membase_imm_size (code, X86_CMP, ins->sreg1, 0, 0, 4);
3061                         break;
3062                 case OP_ARGLIST: {
3063                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, cfg->sig_cookie);
3064                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8);
3065                         break;
3066                 }
3067                 case OP_FCALL:
3068                 case OP_LCALL:
3069                 case OP_VCALL:
3070                 case OP_VOIDCALL:
3071                 case OP_CALL:
3072                         call = (MonoCallInst*)ins;
3073                         /*
3074                          * The AMD64 ABI forces callers to know about varargs.
3075                          */
3076                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
3077                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3078                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
3079                                 /* 
3080                                  * Since the unmanaged calling convention doesn't contain a 
3081                                  * 'vararg' entry, we have to treat every pinvoke call as a
3082                                  * potential vararg call.
3083                                  */
3084                                 guint32 nregs, i;
3085                                 nregs = 0;
3086                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
3087                                         if (call->used_fregs & (1 << i))
3088                                                 nregs ++;
3089                                 if (!nregs)
3090                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3091                                 else
3092                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
3093                         }
3094
3095                         if (ins->flags & MONO_INST_HAS_METHOD)
3096                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE);
3097                         else
3098                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE);
3099                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3100                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3101                         code = emit_move_return_value (cfg, ins, code);
3102                         break;
3103                 case OP_FCALL_REG:
3104                 case OP_LCALL_REG:
3105                 case OP_VCALL_REG:
3106                 case OP_VOIDCALL_REG:
3107                 case OP_CALL_REG:
3108                         call = (MonoCallInst*)ins;
3109
3110                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3111                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3112                                 ins->sreg1 = AMD64_R11;
3113                         }
3114
3115                         /*
3116                          * The AMD64 ABI forces callers to know about varargs.
3117                          */
3118                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
3119                                 if (ins->sreg1 == AMD64_RAX) {
3120                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
3121                                         ins->sreg1 = AMD64_R11;
3122                                 }
3123                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3124                         }
3125                         amd64_call_reg (code, ins->sreg1);
3126                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3127                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3128                         code = emit_move_return_value (cfg, ins, code);
3129                         break;
3130                 case OP_FCALL_MEMBASE:
3131                 case OP_LCALL_MEMBASE:
3132                 case OP_VCALL_MEMBASE:
3133                 case OP_VOIDCALL_MEMBASE:
3134                 case OP_CALL_MEMBASE:
3135                         call = (MonoCallInst*)ins;
3136
3137                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3138                                 /* 
3139                                  * Can't use R11 because it is clobbered by the trampoline 
3140                                  * code, and the reg value is needed by get_vcall_slot_addr.
3141                                  */
3142                                 amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
3143                                 ins->sreg1 = AMD64_RAX;
3144                         }
3145
3146                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
3147                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3148                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3149                         code = emit_move_return_value (cfg, ins, code);
3150                         break;
3151                 case OP_AMD64_SAVE_SP_TO_LMF:
3152                         amd64_mov_membase_reg (code, cfg->frame_reg, cfg->arch.lmf_offset + G_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
3153                         break;
3154                 case OP_OUTARG:
3155                 case OP_X86_PUSH:
3156                         amd64_push_reg (code, ins->sreg1);
3157                         break;
3158                 case OP_X86_PUSH_IMM:
3159                         g_assert (amd64_is_imm32 (ins->inst_imm));
3160                         amd64_push_imm (code, ins->inst_imm);
3161                         break;
3162                 case OP_X86_PUSH_MEMBASE:
3163                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
3164                         break;
3165                 case OP_X86_PUSH_OBJ: 
3166                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
3167                         amd64_push_reg (code, AMD64_RDI);
3168                         amd64_push_reg (code, AMD64_RSI);
3169                         amd64_push_reg (code, AMD64_RCX);
3170                         if (ins->inst_offset)
3171                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
3172                         else
3173                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
3174                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
3175                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
3176                         amd64_cld (code);
3177                         amd64_prefix (code, X86_REP_PREFIX);
3178                         amd64_movsd (code);
3179                         amd64_pop_reg (code, AMD64_RCX);
3180                         amd64_pop_reg (code, AMD64_RSI);
3181                         amd64_pop_reg (code, AMD64_RDI);
3182                         break;
3183                 case OP_X86_LEA:
3184                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
3185                         break;
3186                 case OP_X86_LEA_MEMBASE:
3187                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3188                         break;
3189                 case OP_X86_XCHG:
3190                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3191                         break;
3192                 case OP_LOCALLOC:
3193                         /* keep alignment */
3194                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3195                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3196                         code = mono_emit_stack_alloc (code, ins);
3197                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3198                         break;
3199                 case OP_LOCALLOC_IMM: {
3200                         guint32 size = ins->inst_imm;
3201                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
3202
3203                         if (ins->flags & MONO_INST_INIT) {
3204                                 /* FIXME: Optimize this */
3205                                 amd64_mov_reg_imm (code, ins->dreg, size);
3206                                 ins->sreg1 = ins->dreg;
3207
3208                                 code = mono_emit_stack_alloc (code, ins);
3209                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3210                         } else {
3211                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
3212                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3213                         }
3214                         break;
3215                 }
3216                 case OP_THROW: {
3217                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
3218                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3219                                              (gpointer)"mono_arch_throw_exception", FALSE);
3220                         break;
3221                 }
3222                 case OP_RETHROW: {
3223                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
3224                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3225                                              (gpointer)"mono_arch_rethrow_exception", FALSE);
3226                         break;
3227                 }
3228                 case OP_CALL_HANDLER: 
3229                         /* Align stack */
3230                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
3231                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3232                         amd64_call_imm (code, 0);
3233                         /* Restore stack alignment */
3234                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3235                         break;
3236                 case OP_START_HANDLER: {
3237                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3238                         amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, 8);
3239                         break;
3240                 }
3241                 case OP_ENDFINALLY: {
3242                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3243                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8);
3244                         amd64_ret (code);
3245                         break;
3246                 }
3247                 case OP_ENDFILTER: {
3248                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3249                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, 8);
3250                         /* The local allocator will put the result into RAX */
3251                         amd64_ret (code);
3252                         break;
3253                 }
3254
3255                 case OP_LABEL:
3256                         ins->inst_c0 = code - cfg->native_code;
3257                         break;
3258                 case OP_BR:
3259                         if (ins->flags & MONO_INST_BRLABEL) {
3260                                 if (ins->inst_i0->inst_c0) {
3261                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3262                                 } else {
3263                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3264                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3265                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3266                                                 x86_jump8 (code, 0);
3267                                         else 
3268                                                 x86_jump32 (code, 0);
3269                                 }
3270                         } else {
3271                                 if (ins->inst_target_bb->native_offset) {
3272                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3273                                 } else {
3274                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3275                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3276                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3277                                                 x86_jump8 (code, 0);
3278                                         else 
3279                                                 x86_jump32 (code, 0);
3280                                 } 
3281                         }
3282                         break;
3283                 case OP_BR_REG:
3284                         amd64_jump_reg (code, ins->sreg1);
3285                         break;
3286                 case OP_CEQ:
3287                 case OP_LCEQ:
3288                 case OP_ICEQ:
3289                 case OP_CLT:
3290                 case OP_LCLT:
3291                 case OP_ICLT:
3292                 case OP_CGT:
3293                 case OP_ICGT:
3294                 case OP_LCGT:
3295                 case OP_CLT_UN:
3296                 case OP_LCLT_UN:
3297                 case OP_ICLT_UN:
3298                 case OP_CGT_UN:
3299                 case OP_LCGT_UN:
3300                 case OP_ICGT_UN:
3301                         amd64_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3302                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3303                         break;
3304                 case OP_COND_EXC_EQ:
3305                 case OP_COND_EXC_NE_UN:
3306                 case OP_COND_EXC_LT:
3307                 case OP_COND_EXC_LT_UN:
3308                 case OP_COND_EXC_GT:
3309                 case OP_COND_EXC_GT_UN:
3310                 case OP_COND_EXC_GE:
3311                 case OP_COND_EXC_GE_UN:
3312                 case OP_COND_EXC_LE:
3313                 case OP_COND_EXC_LE_UN:
3314                 case OP_COND_EXC_IEQ:
3315                 case OP_COND_EXC_INE_UN:
3316                 case OP_COND_EXC_ILT:
3317                 case OP_COND_EXC_ILT_UN:
3318                 case OP_COND_EXC_IGT:
3319                 case OP_COND_EXC_IGT_UN:
3320                 case OP_COND_EXC_IGE:
3321                 case OP_COND_EXC_IGE_UN:
3322                 case OP_COND_EXC_ILE:
3323                 case OP_COND_EXC_ILE_UN:
3324                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3325                         break;
3326                 case OP_COND_EXC_OV:
3327                 case OP_COND_EXC_NO:
3328                 case OP_COND_EXC_C:
3329                 case OP_COND_EXC_NC:
3330                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3331                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3332                         break;
3333                 case OP_COND_EXC_IOV:
3334                 case OP_COND_EXC_INO:
3335                 case OP_COND_EXC_IC:
3336                 case OP_COND_EXC_INC:
3337                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], 
3338                                                     (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3339                         break;
3340
3341                 /* floating point opcodes */
3342                 case OP_R8CONST: {
3343                         double d = *(double *)ins->inst_p0;
3344
3345                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3346                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
3347                         }
3348                         else {
3349                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
3350                                 amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
3351                         }
3352                         break;
3353                 }
3354                 case OP_R4CONST: {
3355                         float f = *(float *)ins->inst_p0;
3356
3357                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3358                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
3359                         }
3360                         else {
3361                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
3362                                 amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
3363                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
3364                         }
3365                         break;
3366                 }
3367                 case OP_STORER8_MEMBASE_REG:
3368                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
3369                         break;
3370                 case OP_LOADR8_SPILL_MEMBASE:
3371                         g_assert_not_reached ();
3372                         break;
3373                 case OP_LOADR8_MEMBASE:
3374                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
3375                         break;
3376                 case OP_STORER4_MEMBASE_REG:
3377                         /* This requires a double->single conversion */
3378                         amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1);
3379                         amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15);
3380                         break;
3381                 case OP_LOADR4_MEMBASE:
3382                         amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
3383                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
3384                         break;
3385                 case OP_ICONV_TO_R4: /* FIXME: change precision */
3386                 case OP_ICONV_TO_R8:
3387                         amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
3388                         break;
3389                 case OP_LCONV_TO_R4: /* FIXME: change precision */
3390                 case OP_LCONV_TO_R8:
3391                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
3392                         break;
3393                 case OP_FCONV_TO_R4:
3394                         /* FIXME: nothing to do ?? */
3395                         break;
3396                 case OP_FCONV_TO_I1:
3397                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
3398                         break;
3399                 case OP_FCONV_TO_U1:
3400                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
3401                         break;
3402                 case OP_FCONV_TO_I2:
3403                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
3404                         break;
3405                 case OP_FCONV_TO_U2:
3406                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
3407                         break;
3408                 case OP_FCONV_TO_U4:
3409                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);                  
3410                         break;
3411                 case OP_FCONV_TO_I4:
3412                 case OP_FCONV_TO_I:
3413                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
3414                         break;
3415                 case OP_FCONV_TO_I8:
3416                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
3417                         break;
3418                 case OP_LCONV_TO_R_UN: { 
3419                         guint8 *br [2];
3420
3421                         /* Based on gcc code */
3422                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3423                         br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE);
3424
3425                         /* Positive case */
3426                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
3427                         br [1] = code; x86_jump8 (code, 0);
3428                         amd64_patch (br [0], code);
3429
3430                         /* Negative case */
3431                         /* Save to the red zone */
3432                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8);
3433                         amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
3434                         amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8);
3435                         amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
3436                         amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1);
3437                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1);
3438                         amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX);
3439                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX);
3440                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg);
3441                         /* Restore */
3442                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
3443                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8);
3444                         amd64_patch (br [1], code);
3445                         break;
3446                 }
3447                 case OP_LCONV_TO_OVF_U4:
3448                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
3449                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
3450                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3451                         break;
3452                 case OP_LCONV_TO_OVF_I4_UN:
3453                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
3454                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
3455                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3456                         break;
3457                 case OP_FMOVE:
3458                         if (ins->dreg != ins->sreg1)
3459                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
3460                         break;
3461                 case OP_FADD:
3462                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
3463                         break;
3464                 case OP_FSUB:
3465                         amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
3466                         break;          
3467                 case OP_FMUL:
3468                         amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
3469                         break;          
3470                 case OP_FDIV:
3471                         amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
3472                         break;          
3473                 case OP_FNEG: {
3474                         static double r8_0 = -0.0;
3475
3476                         g_assert (ins->sreg1 == ins->dreg);
3477                                         
3478                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0);
3479                         amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
3480                         break;
3481                 }
3482                 case OP_SIN:
3483                         EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
3484                         break;          
3485                 case OP_COS:
3486                         EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
3487                         break;          
3488                 case OP_ABS: {
3489                         static guint64 d = 0x7fffffffffffffffUL;
3490
3491                         g_assert (ins->sreg1 == ins->dreg);
3492                                         
3493                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &d);
3494                         amd64_sse_andpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
3495                         break;          
3496                 }
3497                 case OP_SQRT:
3498                         EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
3499                         break;
3500                 case OP_IMIN:
3501                         g_assert (cfg->opt & MONO_OPT_CMOV);
3502                         g_assert (ins->dreg == ins->sreg1);
3503                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3504                         amd64_cmov_reg_size (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2, 4);
3505                         break;
3506                 case OP_IMIN_UN:
3507                         g_assert (cfg->opt & MONO_OPT_CMOV);
3508                         g_assert (ins->dreg == ins->sreg1);
3509                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3510                         amd64_cmov_reg_size (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2, 4);
3511                         break;
3512                 case OP_IMAX:
3513                         g_assert (cfg->opt & MONO_OPT_CMOV);
3514                         g_assert (ins->dreg == ins->sreg1);
3515                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3516                         amd64_cmov_reg_size (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2, 4);
3517                         break;
3518                 case OP_IMAX_UN:
3519                         g_assert (cfg->opt & MONO_OPT_CMOV);
3520                         g_assert (ins->dreg == ins->sreg1);
3521                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3522                         amd64_cmov_reg_size (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2, 4);
3523                         break;
3524                 case OP_LMIN:
3525                         g_assert (cfg->opt & MONO_OPT_CMOV);
3526                         g_assert (ins->dreg == ins->sreg1);
3527                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3528                         amd64_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3529                         break;
3530                 case OP_LMIN_UN:
3531                         g_assert (cfg->opt & MONO_OPT_CMOV);
3532                         g_assert (ins->dreg == ins->sreg1);
3533                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3534                         amd64_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3535                         break;
3536                 case OP_LMAX:
3537                         g_assert (cfg->opt & MONO_OPT_CMOV);
3538                         g_assert (ins->dreg == ins->sreg1);
3539                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3540                         amd64_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3541                         break;
3542                 case OP_LMAX_UN:
3543                         g_assert (cfg->opt & MONO_OPT_CMOV);
3544                         g_assert (ins->dreg == ins->sreg1);
3545                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3546                         amd64_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3547                         break;  
3548                 case OP_X86_FPOP:
3549                         break;          
3550                 case OP_FCOMPARE:
3551                         /* 
3552                          * The two arguments are swapped because the fbranch instructions
3553                          * depend on this for the non-sse case to work.
3554                          */
3555                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3556                         break;
3557                 case OP_FCEQ: {
3558                         /* zeroing the register at the start results in 
3559                          * shorter and faster code (we can also remove the widening op)
3560                          */
3561                         guchar *unordered_check;
3562                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3563                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
3564                         unordered_check = code;
3565                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3566                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3567                         amd64_patch (unordered_check, code);
3568                         break;
3569                 }
3570                 case OP_FCLT:
3571                 case OP_FCLT_UN:
3572                         /* zeroing the register at the start results in 
3573                          * shorter and faster code (we can also remove the widening op)
3574                          */
3575                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3576                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3577                         if (ins->opcode == OP_FCLT_UN) {
3578                                 guchar *unordered_check = code;
3579                                 guchar *jump_to_end;
3580                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3581                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3582                                 jump_to_end = code;
3583                                 x86_jump8 (code, 0);
3584                                 amd64_patch (unordered_check, code);
3585                                 amd64_inc_reg (code, ins->dreg);
3586                                 amd64_patch (jump_to_end, code);
3587                         } else {
3588                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3589                         }
3590                         break;
3591                 case OP_FCGT:
3592                 case OP_FCGT_UN: {
3593                         /* zeroing the register at the start results in 
3594                          * shorter and faster code (we can also remove the widening op)
3595                          */
3596                         guchar *unordered_check;
3597                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3598                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3599                         if (ins->opcode == OP_FCGT) {
3600                                 unordered_check = code;
3601                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3602                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3603                                 amd64_patch (unordered_check, code);
3604                         } else {
3605                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3606                         }
3607                         break;
3608                 }
3609                 case OP_FCLT_MEMBASE:
3610                 case OP_FCGT_MEMBASE:
3611                 case OP_FCLT_UN_MEMBASE:
3612                 case OP_FCGT_UN_MEMBASE:
3613                 case OP_FCEQ_MEMBASE: {
3614                         guchar *unordered_check, *jump_to_end;
3615                         int x86_cond;
3616
3617                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3618                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
3619
3620                         switch (ins->opcode) {
3621                         case OP_FCEQ_MEMBASE:
3622                                 x86_cond = X86_CC_EQ;
3623                                 break;
3624                         case OP_FCLT_MEMBASE:
3625                         case OP_FCLT_UN_MEMBASE:
3626                                 x86_cond = X86_CC_LT;
3627                                 break;
3628                         case OP_FCGT_MEMBASE:
3629                         case OP_FCGT_UN_MEMBASE:
3630                                 x86_cond = X86_CC_GT;
3631                                 break;
3632                         default:
3633                                 g_assert_not_reached ();
3634                         }
3635
3636                         unordered_check = code;
3637                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3638                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
3639
3640                         switch (ins->opcode) {
3641                         case OP_FCEQ_MEMBASE:
3642                         case OP_FCLT_MEMBASE:
3643                         case OP_FCGT_MEMBASE:
3644                                 amd64_patch (unordered_check, code);
3645                                 break;
3646                         case OP_FCLT_UN_MEMBASE:
3647                         case OP_FCGT_UN_MEMBASE:
3648                                 jump_to_end = code;
3649                                 x86_jump8 (code, 0);
3650                                 amd64_patch (unordered_check, code);
3651                                 amd64_inc_reg (code, ins->dreg);
3652                                 amd64_patch (jump_to_end, code);
3653                                 break;
3654                         default:
3655                                 break;
3656                         }
3657                         break;
3658                 }
3659                 case OP_FBEQ: {
3660                         guchar *jump = code;
3661                         x86_branch8 (code, X86_CC_P, 0, TRUE);
3662                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3663                         amd64_patch (jump, code);
3664                         break;
3665                 }
3666                 case OP_FBNE_UN:
3667                         /* Branch if C013 != 100 */
3668                         /* branch if !ZF or (PF|CF) */
3669                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3670                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3671                         EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3672                         break;
3673                 case OP_FBLT:
3674                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3675                         break;
3676                 case OP_FBLT_UN:
3677                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3678                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3679                         break;
3680                 case OP_FBGT:
3681                 case OP_FBGT_UN:
3682                         if (ins->opcode == OP_FBGT) {
3683                                 guchar *br1;
3684
3685                                 /* skip branch if C1=1 */
3686                                 br1 = code;
3687                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3688                                 /* branch if (C0 | C3) = 1 */
3689                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3690                                 amd64_patch (br1, code);
3691                                 break;
3692                         } else {
3693                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3694                         }
3695                         break;
3696                 case OP_FBGE: {
3697                         /* Branch if C013 == 100 or 001 */
3698                         guchar *br1;
3699
3700                         /* skip branch if C1=1 */
3701                         br1 = code;
3702                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3703                         /* branch if (C0 | C3) = 1 */
3704                         EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3705                         amd64_patch (br1, code);
3706                         break;
3707                 }
3708                 case OP_FBGE_UN:
3709                         /* Branch if C013 == 000 */
3710                         EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3711                         break;
3712                 case OP_FBLE: {
3713                         /* Branch if C013=000 or 100 */
3714                         guchar *br1;
3715
3716                         /* skip branch if C1=1 */
3717                         br1 = code;
3718                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3719                         /* branch if C0=0 */
3720                         EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3721                         amd64_patch (br1, code);
3722                         break;
3723                 }
3724                 case OP_FBLE_UN:
3725                         /* Branch if C013 != 001 */
3726                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3727                         EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3728                         break;
3729                 case OP_CKFINITE:
3730                         /* Transfer value to the fp stack */
3731                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
3732                         amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
3733                         amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
3734
3735                         amd64_push_reg (code, AMD64_RAX);
3736                         amd64_fxam (code);
3737                         amd64_fnstsw (code);
3738                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
3739                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3740                         amd64_pop_reg (code, AMD64_RAX);
3741                         amd64_fstp (code, 0);
3742                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3743                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
3744                         break;
3745                 case OP_TLS_GET: {
3746                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3747                         break;
3748                 }
3749                 case OP_MEMORY_BARRIER: {
3750                         /* Not needed on amd64 */
3751                         break;
3752                 }
3753                 case OP_ATOMIC_ADD_I4:
3754                 case OP_ATOMIC_ADD_I8: {
3755                         int dreg = ins->dreg;
3756                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
3757
3758                         if (dreg == ins->inst_basereg)
3759                                 dreg = AMD64_R11;
3760                         
3761                         if (dreg != ins->sreg2)
3762                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg2, size);
3763
3764                         x86_prefix (code, X86_LOCK_PREFIX);
3765                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3766
3767                         if (dreg != ins->dreg)
3768                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3769
3770                         break;
3771                 }
3772                 case OP_ATOMIC_ADD_NEW_I4:
3773                 case OP_ATOMIC_ADD_NEW_I8: {
3774                         int dreg = ins->dreg;
3775                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_NEW_I4) ? 4 : 8;
3776
3777                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
3778                                 dreg = AMD64_R11;
3779
3780                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
3781                         amd64_prefix (code, X86_LOCK_PREFIX);
3782                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3783                         /* dreg contains the old value, add with sreg2 value */
3784                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
3785                         
3786                         if (ins->dreg != dreg)
3787                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3788
3789                         break;
3790                 }
3791                 case OP_ATOMIC_EXCHANGE_I4:
3792                 case OP_ATOMIC_EXCHANGE_I8:
3793                 case OP_ATOMIC_CAS_IMM_I4: {
3794                         guchar *br[2];
3795                         int sreg2 = ins->sreg2;
3796                         int breg = ins->inst_basereg;
3797                         guint32 size;
3798                         gboolean need_push = FALSE, rdx_pushed = FALSE;
3799
3800                         if (ins->opcode == OP_ATOMIC_EXCHANGE_I8)
3801                                 size = 8;
3802                         else
3803                                 size = 4;
3804
3805                         /* 
3806                          * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
3807                          * an explanation of how this works.
3808                          */
3809
3810                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3811                          * hack to overcome limits in x86 reg allocator 
3812                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3813                          */
3814                         g_assert (ins->dreg == AMD64_RAX);
3815
3816                         if (breg == AMD64_RAX && ins->sreg2 == AMD64_RAX)
3817                                 /* Highly unlikely, but possible */
3818                                 need_push = TRUE;
3819
3820                         /* The pushes invalidate rsp */
3821                         if ((breg == AMD64_RAX) || need_push) {
3822                                 amd64_mov_reg_reg (code, AMD64_R11, breg, 8);
3823                                 breg = AMD64_R11;
3824                         }
3825
3826                         /* We need the EAX reg for the comparand */
3827                         if (ins->sreg2 == AMD64_RAX) {
3828                                 if (breg != AMD64_R11) {
3829                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
3830                                         sreg2 = AMD64_R11;
3831                                 } else {
3832                                         g_assert (need_push);
3833                                         amd64_push_reg (code, AMD64_RDX);
3834                                         amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
3835                                         sreg2 = AMD64_RDX;
3836                                         rdx_pushed = TRUE;
3837                                 }
3838                         }
3839
3840                         if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
3841                                 if (ins->backend.data == NULL)
3842                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3843                                 else
3844                                         amd64_mov_reg_imm (code, AMD64_RAX, ins->backend.data);
3845
3846                                 amd64_prefix (code, X86_LOCK_PREFIX);
3847                                 amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
3848                         } else {
3849                                 amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
3850
3851                                 br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
3852                                 amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
3853                                 br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
3854                                 amd64_patch (br [1], br [0]);
3855                         }
3856
3857                         if (rdx_pushed)
3858                                 amd64_pop_reg (code, AMD64_RDX);
3859
3860                         break;
3861                 }
3862                 default:
3863                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3864                         g_assert_not_reached ();
3865                 }
3866
3867                 if ((code - cfg->native_code - offset) > max_len) {
3868                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
3869                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3870                         g_assert_not_reached ();
3871                 }
3872                
3873                 cpos += max_len;
3874
3875                 last_offset = offset;
3876         }
3877
3878         cfg->code_len = code - cfg->native_code;
3879 }
3880
3881 void
3882 mono_arch_register_lowlevel_calls (void)
3883 {
3884 }
3885
3886 void
3887 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3888 {
3889         MonoJumpInfo *patch_info;
3890         gboolean compile_aot = !run_cctors;
3891
3892         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3893                 unsigned char *ip = patch_info->ip.i + code;
3894                 unsigned char *target;
3895
3896                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3897
3898                 if (compile_aot) {
3899                         switch (patch_info->type) {
3900                         case MONO_PATCH_INFO_BB:
3901                         case MONO_PATCH_INFO_LABEL:
3902                                 break;
3903                         default:
3904                                 /* No need to patch these */
3905                                 continue;
3906                         }
3907                 }
3908
3909                 switch (patch_info->type) {
3910                 case MONO_PATCH_INFO_NONE:
3911                         continue;
3912                 case MONO_PATCH_INFO_METHOD_REL:
3913                 case MONO_PATCH_INFO_R8:
3914                 case MONO_PATCH_INFO_R4:
3915                         g_assert_not_reached ();
3916                         continue;
3917                 case MONO_PATCH_INFO_BB:
3918                         break;
3919                 default:
3920                         break;
3921                 }
3922
3923                 /* 
3924                  * Debug code to help track down problems where the target of a near call is
3925                  * is not valid.
3926                  */
3927                 if (amd64_is_near_call (ip)) {
3928                         gint64 disp = (guint8*)target - (guint8*)ip;
3929
3930                         if (!amd64_is_imm32 (disp)) {
3931                                 printf ("TYPE: %d\n", patch_info->type);
3932                                 switch (patch_info->type) {
3933                                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3934                                         printf ("V: %s\n", patch_info->data.name);
3935                                         break;
3936                                 case MONO_PATCH_INFO_METHOD_JUMP:
3937                                 case MONO_PATCH_INFO_METHOD:
3938                                         printf ("V: %s\n", patch_info->data.method->name);
3939                                         break;
3940                                 default:
3941                                         break;
3942                                 }
3943                         }
3944                 }
3945
3946                 amd64_patch (ip, (gpointer)target);
3947         }
3948 }
3949
3950 static int
3951 get_max_epilog_size (MonoCompile *cfg)
3952 {
3953         int max_epilog_size = 16;
3954         
3955         if (cfg->method->save_lmf)
3956                 max_epilog_size += 256;
3957         
3958         if (mono_jit_trace_calls != NULL)
3959                 max_epilog_size += 50;
3960
3961         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3962                 max_epilog_size += 50;
3963
3964         max_epilog_size += (AMD64_NREG * 2);
3965
3966         return max_epilog_size;
3967 }
3968
3969 /*
3970  * This macro is used for testing whenever the unwinder works correctly at every point
3971  * where an async exception can happen.
3972  */
3973 /* This will generate a SIGSEGV at the given point in the code */
3974 #define async_exc_point(code) do { \
3975     if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
3976          if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
3977              amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
3978          cfg->arch.async_point_count ++; \
3979     } \
3980 } while (0)
3981
3982 guint8 *
3983 mono_arch_emit_prolog (MonoCompile *cfg)
3984 {
3985         MonoMethod *method = cfg->method;
3986         MonoBasicBlock *bb;
3987         MonoMethodSignature *sig;
3988         MonoInst *ins;
3989         int alloc_size, pos, max_offset, i, quad, max_epilog_size;
3990         guint8 *code;
3991         CallInfo *cinfo;
3992         gint32 lmf_offset = cfg->arch.lmf_offset;
3993         gboolean args_clobbered = FALSE;
3994         gboolean trace = FALSE;
3995
3996         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 10240);
3997
3998         code = cfg->native_code = g_malloc (cfg->code_size);
3999
4000         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4001                 trace = TRUE;
4002
4003         /* Amount of stack space allocated by register saving code */
4004         pos = 0;
4005
4006         /* 
4007          * The prolog consists of the following parts:
4008          * FP present:
4009          * - push rbp, mov rbp, rsp
4010          * - save callee saved regs using pushes
4011          * - allocate frame
4012          * - save rgctx if needed
4013          * - save lmf if needed
4014          * FP not present:
4015          * - allocate frame
4016          * - save rgctx if needed
4017          * - save lmf if needed
4018          * - save callee saved regs using moves
4019          */
4020
4021         async_exc_point (code);
4022
4023         if (!cfg->arch.omit_fp) {
4024                 amd64_push_reg (code, AMD64_RBP);
4025                 async_exc_point (code);
4026                 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
4027                 async_exc_point (code);
4028         }
4029
4030         /* Save callee saved registers */
4031         if (!cfg->arch.omit_fp && !method->save_lmf) {
4032                 for (i = 0; i < AMD64_NREG; ++i)
4033                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4034                                 amd64_push_reg (code, i);
4035                                 pos += sizeof (gpointer);
4036                                 async_exc_point (code);
4037                         }
4038         }
4039
4040         if (cfg->arch.omit_fp) {
4041                 /* 
4042                  * On enter, the stack is misaligned by the the pushing of the return
4043                  * address. It is either made aligned by the pushing of %rbp, or by
4044                  * this.
4045                  */
4046                 alloc_size = ALIGN_TO (cfg->stack_offset, 8);
4047                 if ((alloc_size % 16) == 0)
4048                         alloc_size += 8;
4049         } else {
4050                 alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
4051
4052                 alloc_size -= pos;
4053         }
4054
4055         cfg->arch.stack_alloc_size = alloc_size;
4056
4057         /* Allocate stack frame */
4058         if (alloc_size) {
4059                 /* See mono_emit_stack_alloc */
4060 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4061                 guint32 remaining_size = alloc_size;
4062                 while (remaining_size >= 0x1000) {
4063                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
4064                         async_exc_point (code);
4065                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
4066                         remaining_size -= 0x1000;
4067                 }
4068                 if (remaining_size) {
4069                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
4070                         async_exc_point (code);
4071                 }
4072 #else
4073                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
4074                 async_exc_point (code);
4075 #endif
4076         }
4077
4078         /* Stack alignment check */
4079 #if 0
4080         {
4081                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
4082                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
4083                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4084                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
4085                 amd64_breakpoint (code);
4086         }
4087 #endif
4088
4089         /* Save LMF */
4090         if (method->save_lmf) {
4091                 /* 
4092                  * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
4093                  */
4094                 /* sp is saved right before calls */
4095                 /* Skip method (only needed for trampoline LMF frames) */
4096                 /* Save callee saved regs */
4097                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
4098                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), AMD64_RBP, 8);
4099                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
4100                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
4101                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
4102                 amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
4103         }
4104
4105         /* Save callee saved registers */
4106         if (cfg->arch.omit_fp && !method->save_lmf) {
4107                 gint32 save_area_offset = cfg->arch.reg_save_area_offset;
4108
4109                 /* Save caller saved registers after sp is adjusted */
4110                 /* The registers are saved at the bottom of the frame */
4111                 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
4112                 for (i = 0; i < AMD64_NREG; ++i)
4113                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4114                                 amd64_mov_membase_reg (code, AMD64_RSP, save_area_offset, i, 8);
4115                                 save_area_offset += 8;
4116                                 async_exc_point (code);
4117                         }
4118         }
4119
4120         /* store runtime generic context */
4121         if (cfg->rgctx_var) {
4122                 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
4123                                 (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP));
4124
4125                 amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 8);
4126         }
4127
4128         /* compute max_offset in order to use short forward jumps */
4129         max_offset = 0;
4130         max_epilog_size = get_max_epilog_size (cfg);
4131         if (cfg->opt & MONO_OPT_BRANCH) {
4132                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4133                         bb->max_offset = max_offset;
4134
4135                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4136                                 max_offset += 6;
4137                         /* max alignment for loops */
4138                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4139                                 max_offset += LOOP_ALIGNMENT;
4140
4141                         MONO_BB_FOR_EACH_INS (bb, ins) {
4142                                 if (ins->opcode == OP_LABEL)
4143                                         ins->inst_c1 = max_offset;
4144                                 
4145                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4146                         }
4147
4148                         if (mono_jit_trace_calls && bb == cfg->bb_exit)
4149                                 /* The tracing code can be quite large */
4150                                 max_offset += max_epilog_size;
4151                 }
4152         }
4153
4154         sig = mono_method_signature (method);
4155         pos = 0;
4156
4157         cinfo = cfg->arch.cinfo;
4158
4159         if (sig->ret->type != MONO_TYPE_VOID) {
4160                 /* Save volatile arguments to the stack */
4161                 if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
4162                         amd64_mov_membase_reg (code, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, cinfo->ret.reg, 8);
4163         }
4164
4165         /* Keep this in sync with emit_load_volatile_arguments */
4166         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4167                 ArgInfo *ainfo = cinfo->args + i;
4168                 gint32 stack_offset;
4169                 MonoType *arg_type;
4170
4171                 ins = cfg->args [i];
4172
4173                 if ((ins->flags & MONO_INST_IS_DEAD) && !trace)
4174                         /* Unused arguments */
4175                         continue;
4176
4177                 if (sig->hasthis && (i == 0))
4178                         arg_type = &mono_defaults.object_class->byval_arg;
4179                 else
4180                         arg_type = sig->params [i - sig->hasthis];
4181
4182                 stack_offset = ainfo->offset + ARGS_OFFSET;
4183
4184                 /* Save volatile arguments to the stack */
4185                 if (ins->opcode != OP_REGVAR) {
4186                         switch (ainfo->storage) {
4187                         case ArgInIReg: {
4188                                 guint32 size = 8;
4189
4190                                 /* FIXME: I1 etc */
4191                                 /*
4192                                 if (stack_offset & 0x1)
4193                                         size = 1;
4194                                 else if (stack_offset & 0x2)
4195                                         size = 2;
4196                                 else if (stack_offset & 0x4)
4197                                         size = 4;
4198                                 else
4199                                         size = 8;
4200                                 */
4201                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, size);
4202                                 break;
4203                         }
4204                         case ArgInFloatSSEReg:
4205                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
4206                                 break;
4207                         case ArgInDoubleSSEReg:
4208                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
4209                                 break;
4210                         case ArgValuetypeInReg:
4211                                 for (quad = 0; quad < 2; quad ++) {
4212                                         switch (ainfo->pair_storage [quad]) {
4213                                         case ArgInIReg:
4214                                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer));
4215                                                 break;
4216                                         case ArgInFloatSSEReg:
4217                                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
4218                                                 break;
4219                                         case ArgInDoubleSSEReg:
4220                                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
4221                                                 break;
4222                                         case ArgNone:
4223                                                 break;
4224                                         default:
4225                                                 g_assert_not_reached ();
4226                                         }
4227                                 }
4228                                 break;
4229                         default:
4230                                 break;
4231                         }
4232                 } else {
4233                         /* Argument allocated to (non-volatile) register */
4234                         switch (ainfo->storage) {
4235                         case ArgInIReg:
4236                                 amd64_mov_reg_reg (code, ins->dreg, ainfo->reg, 8);
4237                                 break;
4238                         case ArgOnStack:
4239                                 amd64_mov_reg_membase (code, ins->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
4240                                 break;
4241                         default:
4242                                 g_assert_not_reached ();
4243                         }
4244                 }
4245         }
4246
4247         /* Might need to attach the thread to the JIT  or change the domain for the callback */
4248         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4249                 guint64 domain = (guint64)cfg->domain;
4250
4251                 args_clobbered = TRUE;
4252
4253                 /* 
4254                  * The call might clobber argument registers, but they are already
4255                  * saved to the stack/global regs.
4256                  */
4257                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4258                         guint8 *buf, *no_domain_branch;
4259
4260                         code = emit_tls_get (code, AMD64_RAX, appdomain_tls_offset);
4261                         if ((domain >> 32) == 0)
4262                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
4263                         else
4264                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
4265                         amd64_alu_reg_reg (code, X86_CMP, AMD64_RAX, AMD64_ARG_REG1);
4266                         no_domain_branch = code;
4267                         x86_branch8 (code, X86_CC_NE, 0, 0);
4268                         code = emit_tls_get ( code, AMD64_RAX, lmf_addr_tls_offset);
4269                         amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
4270                         buf = code;
4271                         x86_branch8 (code, X86_CC_NE, 0, 0);
4272                         amd64_patch (no_domain_branch, code);
4273                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4274                                           (gpointer)"mono_jit_thread_attach", TRUE);
4275                         amd64_patch (buf, code);
4276                 } else {
4277                         g_assert (!cfg->compile_aot);
4278                         if ((domain >> 32) == 0)
4279                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 4);
4280                         else
4281                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG1, domain, 8);
4282                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
4283                                           (gpointer)"mono_jit_thread_attach", TRUE);
4284                 }
4285         }
4286
4287         if (method->save_lmf) {
4288                 if ((lmf_tls_offset != -1) && !optimize_for_xen) {
4289                         /*
4290                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
4291                          * through the mono_lmf_addr TLS variable.
4292                          */
4293                         /* %rax = previous_lmf */
4294                         x86_prefix (code, X86_FS_PREFIX);
4295                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
4296
4297                         /* Save previous_lmf */
4298                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_RAX, 8);
4299                         /* Set new lmf */
4300                         if (lmf_offset == 0) {
4301                                 x86_prefix (code, X86_FS_PREFIX);
4302                                 amd64_mov_mem_reg (code, lmf_tls_offset, cfg->frame_reg, 8);
4303                         } else {
4304                                 amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset);
4305                                 x86_prefix (code, X86_FS_PREFIX);
4306                                 amd64_mov_mem_reg (code, lmf_tls_offset, AMD64_R11, 8);
4307                         }
4308                 } else {
4309                         if (lmf_addr_tls_offset != -1) {
4310                                 /* Load lmf quicky using the FS register */
4311                                 code = emit_tls_get (code, AMD64_RAX, lmf_addr_tls_offset);
4312                         }
4313                         else {
4314                                 /* 
4315                                  * The call might clobber argument registers, but they are already
4316                                  * saved to the stack/global regs.
4317                                  */
4318                                 args_clobbered = TRUE;
4319                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4320                                                                   (gpointer)"mono_get_lmf_addr", TRUE);         
4321                         }
4322
4323                         /* Save lmf_addr */
4324                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
4325                         /* Save previous_lmf */
4326                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
4327                         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
4328                         /* Set new lmf */
4329                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset);
4330                         amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
4331                 }
4332         }
4333
4334         if (trace) {
4335                 args_clobbered = TRUE;
4336                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4337         }
4338
4339         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4340                 args_clobbered = TRUE;
4341
4342         /*
4343          * Optimize the common case of the first bblock making a call with the same
4344          * arguments as the method. This works because the arguments are still in their
4345          * original argument registers.
4346          * FIXME: Generalize this
4347          */
4348         if (!args_clobbered) {
4349                 MonoBasicBlock *first_bb = cfg->bb_entry;
4350                 MonoInst *next;
4351
4352                 next = mono_inst_list_first (&first_bb->ins_list);
4353                 if (!next && first_bb->next_bb) {
4354                         first_bb = first_bb->next_bb;
4355                         next = mono_inst_list_first (&first_bb->ins_list);
4356                 }
4357
4358                 if (first_bb->in_count > 1)
4359                         next = NULL;
4360
4361                 for (i = 0; next && i < sig->param_count + sig->hasthis; ++i) {
4362                         ArgInfo *ainfo = cinfo->args + i;
4363                         gboolean match = FALSE;
4364                         
4365                         ins = cfg->args [i];
4366                         if (ins->opcode != OP_REGVAR) {
4367                                 switch (ainfo->storage) {
4368                                 case ArgInIReg: {
4369                                         if (((next->opcode == OP_LOAD_MEMBASE) || (next->opcode == OP_LOADI4_MEMBASE)) && next->inst_basereg == ins->inst_basereg && next->inst_offset == ins->inst_offset) {
4370                                                 if (next->dreg == ainfo->reg) {
4371                                                         NULLIFY_INS (next);
4372                                                         match = TRUE;
4373                                                 } else {
4374                                                         next->opcode = OP_MOVE;
4375                                                         next->sreg1 = ainfo->reg;
4376                                                         /* Only continue if the instruction doesn't change argument regs */
4377                                                         if (next->dreg == ainfo->reg || next->dreg == AMD64_RAX)
4378                                                                 match = TRUE;
4379                                                 }
4380                                         }
4381                                         break;
4382                                 }
4383                                 default:
4384                                         break;
4385                                 }
4386                         } else {
4387                                 /* Argument allocated to (non-volatile) register */
4388                                 switch (ainfo->storage) {
4389                                 case ArgInIReg:
4390                                         if (next->opcode == OP_MOVE && next->sreg1 == ins->dreg && next->dreg == ainfo->reg) {
4391                                                 NULLIFY_INS (next);
4392                                                 match = TRUE;
4393                                         }
4394                                         break;
4395                                 default:
4396                                         break;
4397                                 }
4398                         }
4399
4400                         if (match) {
4401                                 next = mono_inst_list_next (&next->node, &first_bb->ins_list);
4402                                 if (!next)
4403                                         break;
4404                         }
4405                 }
4406         }
4407
4408         cfg->code_len = code - cfg->native_code;
4409
4410         g_assert (cfg->code_len < cfg->code_size);
4411
4412         return code;
4413 }
4414
4415 void
4416 mono_arch_emit_epilog (MonoCompile *cfg)
4417 {
4418         MonoMethod *method = cfg->method;
4419         int quad, pos, i;
4420         guint8 *code;
4421         int max_epilog_size;
4422         CallInfo *cinfo;
4423         gint32 lmf_offset = cfg->arch.lmf_offset;
4424         
4425         max_epilog_size = get_max_epilog_size (cfg);
4426
4427         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4428                 cfg->code_size *= 2;
4429                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4430                 mono_jit_stats.code_reallocs++;
4431         }
4432
4433         code = cfg->native_code + cfg->code_len;
4434
4435         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4436                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4437
4438         /* the code restoring the registers must be kept in sync with OP_JMP */
4439         pos = 0;
4440         
4441         if (method->save_lmf) {
4442                 if ((lmf_tls_offset != -1) && !optimize_for_xen) {
4443                         /*
4444                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
4445                          * through the mono_lmf_addr TLS variable.
4446                          */
4447                         /* reg = previous_lmf */
4448                         amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
4449                         x86_prefix (code, X86_FS_PREFIX);
4450                         amd64_mov_mem_reg (code, lmf_tls_offset, AMD64_R11, 8);
4451                 } else {
4452                         /* Restore previous lmf */
4453                         amd64_mov_reg_membase (code, AMD64_RCX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
4454                         amd64_mov_reg_membase (code, AMD64_R11, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
4455                         amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
4456                 }
4457
4458                 /* Restore caller saved regs */
4459                 if (cfg->used_int_regs & (1 << AMD64_RBP)) {
4460                         amd64_mov_reg_membase (code, AMD64_RBP, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbp), 8);
4461                 }
4462                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4463                         amd64_mov_reg_membase (code, AMD64_RBX, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4464                 }
4465                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4466                         amd64_mov_reg_membase (code, AMD64_R12, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4467                 }
4468                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4469                         amd64_mov_reg_membase (code, AMD64_R13, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4470                 }
4471                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4472                         amd64_mov_reg_membase (code, AMD64_R14, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4473                 }
4474                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4475                         amd64_mov_reg_membase (code, AMD64_R15, cfg->frame_reg, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4476                 }
4477         } else {
4478
4479                 if (cfg->arch.omit_fp) {
4480                         gint32 save_area_offset = cfg->arch.reg_save_area_offset;
4481
4482                         for (i = 0; i < AMD64_NREG; ++i)
4483                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4484                                         amd64_mov_reg_membase (code, i, AMD64_RSP, save_area_offset, 8);
4485                                         save_area_offset += 8;
4486                                 }
4487                 }
4488                 else {
4489                         for (i = 0; i < AMD64_NREG; ++i)
4490                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4491                                         pos -= sizeof (gpointer);
4492
4493                         if (pos) {
4494                                 if (pos == - sizeof (gpointer)) {
4495                                         /* Only one register, so avoid lea */
4496                                         for (i = AMD64_NREG - 1; i > 0; --i)
4497                                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4498                                                         amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4499                                                 }
4500                                 }
4501                                 else {
4502                                         amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4503
4504                                         /* Pop registers in reverse order */
4505                                         for (i = AMD64_NREG - 1; i > 0; --i)
4506                                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4507                                                         amd64_pop_reg (code, i);
4508                                                 }
4509                                 }
4510                         }
4511                 }
4512         }
4513
4514         /* Load returned vtypes into registers if needed */
4515         cinfo = cfg->arch.cinfo;
4516         if (cinfo->ret.storage == ArgValuetypeInReg) {
4517                 ArgInfo *ainfo = &cinfo->ret;
4518                 MonoInst *inst = cfg->ret;
4519
4520                 for (quad = 0; quad < 2; quad ++) {
4521                         switch (ainfo->pair_storage [quad]) {
4522                         case ArgInIReg:
4523                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
4524                                 break;
4525                         case ArgInFloatSSEReg:
4526                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4527                                 break;
4528                         case ArgInDoubleSSEReg:
4529                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4530                                 break;
4531                         case ArgNone:
4532                                 break;
4533                         default:
4534                                 g_assert_not_reached ();
4535                         }
4536                 }
4537         }
4538
4539         if (cfg->arch.omit_fp) {
4540                 if (cfg->arch.stack_alloc_size)
4541                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
4542         } else {
4543                 amd64_leave (code);
4544         }
4545         async_exc_point (code);
4546         amd64_ret (code);
4547
4548         cfg->code_len = code - cfg->native_code;
4549
4550         g_assert (cfg->code_len < cfg->code_size);
4551
4552         if (cfg->arch.omit_fp) {
4553                 /* 
4554                  * Encode the stack size into used_int_regs so the exception handler
4555                  * can access it.
4556                  */
4557                 g_assert (cfg->arch.stack_alloc_size < (1 << 16));
4558                 cfg->used_int_regs |= (1 << 31) | (cfg->arch.stack_alloc_size << 16);
4559         }
4560 }
4561
4562 void
4563 mono_arch_emit_exceptions (MonoCompile *cfg)
4564 {
4565         MonoJumpInfo *patch_info;
4566         int nthrows, i;
4567         guint8 *code;
4568         MonoClass *exc_classes [16];
4569         guint8 *exc_throw_start [16], *exc_throw_end [16];
4570         guint32 code_size = 0;
4571
4572         /* Compute needed space */
4573         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4574                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4575                         code_size += 40;
4576                 if (patch_info->type == MONO_PATCH_INFO_R8)
4577                         code_size += 8 + 15; /* sizeof (double) + alignment */
4578                 if (patch_info->type == MONO_PATCH_INFO_R4)
4579                         code_size += 4 + 15; /* sizeof (float) + alignment */
4580         }
4581
4582         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4583                 cfg->code_size *= 2;
4584                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4585                 mono_jit_stats.code_reallocs++;
4586         }
4587
4588         code = cfg->native_code + cfg->code_len;
4589
4590         /* add code to raise exceptions */
4591         nthrows = 0;
4592         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4593                 switch (patch_info->type) {
4594                 case MONO_PATCH_INFO_EXC: {
4595                         MonoClass *exc_class;
4596                         guint8 *buf, *buf2;
4597                         guint32 throw_ip;
4598
4599                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4600
4601                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4602                         g_assert (exc_class);
4603                         throw_ip = patch_info->ip.i;
4604
4605                         //x86_breakpoint (code);
4606                         /* Find a throw sequence for the same exception class */
4607                         for (i = 0; i < nthrows; ++i)
4608                                 if (exc_classes [i] == exc_class)
4609                                         break;
4610                         if (i < nthrows) {
4611                                 amd64_mov_reg_imm (code, AMD64_ARG_REG2, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4612                                 x86_jump_code (code, exc_throw_start [i]);
4613                                 patch_info->type = MONO_PATCH_INFO_NONE;
4614                         }
4615                         else {
4616                                 buf = code;
4617                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG2, 0xf0f0f0f0, 4);
4618                                 buf2 = code;
4619
4620                                 if (nthrows < 16) {
4621                                         exc_classes [nthrows] = exc_class;
4622                                         exc_throw_start [nthrows] = code;
4623                                 }
4624                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, exc_class->type_token);
4625
4626                                 patch_info->type = MONO_PATCH_INFO_NONE;
4627
4628                                 code = emit_call_body (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_arch_throw_corlib_exception");
4629
4630                                 amd64_mov_reg_imm (buf, AMD64_ARG_REG2, (code - cfg->native_code) - throw_ip);
4631                                 while (buf < buf2)
4632                                         x86_nop (buf);
4633
4634                                 if (nthrows < 16) {
4635                                         exc_throw_end [nthrows] = code;
4636                                         nthrows ++;
4637                                 }
4638                         }
4639                         break;
4640                 }
4641                 default:
4642                         /* do nothing */
4643                         break;
4644                 }
4645         }
4646
4647         /* Handle relocations with RIP relative addressing */
4648         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4649                 gboolean remove = FALSE;
4650
4651                 switch (patch_info->type) {
4652                 case MONO_PATCH_INFO_R8:
4653                 case MONO_PATCH_INFO_R4: {
4654                         guint8 *pos;
4655
4656                         /* The SSE opcodes require a 16 byte alignment */
4657                         code = (guint8*)ALIGN_TO (code, 16);
4658
4659                         pos = cfg->native_code + patch_info->ip.i;
4660
4661                         if (IS_REX (pos [1]))
4662                                 *(guint32*)(pos + 5) = (guint8*)code - pos - 9;
4663                         else
4664                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
4665
4666                         if (patch_info->type == MONO_PATCH_INFO_R8) {
4667                                 *(double*)code = *(double*)patch_info->data.target;
4668                                 code += sizeof (double);
4669                         } else {
4670                                 *(float*)code = *(float*)patch_info->data.target;
4671                                 code += sizeof (float);
4672                         }
4673
4674                         remove = TRUE;
4675                         break;
4676                 }
4677                 default:
4678                         break;
4679                 }
4680
4681                 if (remove) {
4682                         if (patch_info == cfg->patch_info)
4683                                 cfg->patch_info = patch_info->next;
4684                         else {
4685                                 MonoJumpInfo *tmp;
4686
4687                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4688                                         ;
4689                                 tmp->next = patch_info->next;
4690                         }
4691                 }
4692         }
4693
4694         cfg->code_len = code - cfg->native_code;
4695
4696         g_assert (cfg->code_len < cfg->code_size);
4697
4698 }
4699
4700 void*
4701 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4702 {
4703         guchar *code = p;
4704         CallInfo *cinfo = NULL;
4705         MonoMethodSignature *sig;
4706         MonoInst *inst;
4707         int i, n, stack_area = 0;
4708
4709         /* Keep this in sync with mono_arch_get_argument_info */
4710
4711         if (enable_arguments) {
4712                 /* Allocate a new area on the stack and save arguments there */
4713                 sig = mono_method_signature (cfg->method);
4714
4715                 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4716
4717                 n = sig->param_count + sig->hasthis;
4718
4719                 stack_area = ALIGN_TO (n * 8, 16);
4720
4721                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4722
4723                 for (i = 0; i < n; ++i) {
4724                         inst = cfg->args [i];
4725
4726                         if (inst->opcode == OP_REGVAR)
4727                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
4728                         else {
4729                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4730                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4731                         }
4732                 }
4733         }
4734
4735         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4736         amd64_set_reg_template (code, AMD64_ARG_REG1);
4737         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RSP, 8);
4738         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
4739
4740         if (enable_arguments)
4741                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4742
4743         return code;
4744 }
4745
4746 enum {
4747         SAVE_NONE,
4748         SAVE_STRUCT,
4749         SAVE_EAX,
4750         SAVE_EAX_EDX,
4751         SAVE_XMM
4752 };
4753
4754 void*
4755 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4756 {
4757         guchar *code = p;
4758         int save_mode = SAVE_NONE;
4759         MonoMethod *method = cfg->method;
4760         int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
4761         
4762         switch (rtype) {
4763         case MONO_TYPE_VOID:
4764                 /* special case string .ctor icall */
4765                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4766                         save_mode = SAVE_EAX;
4767                 else
4768                         save_mode = SAVE_NONE;
4769                 break;
4770         case MONO_TYPE_I8:
4771         case MONO_TYPE_U8:
4772                 save_mode = SAVE_EAX;
4773                 break;
4774         case MONO_TYPE_R4:
4775         case MONO_TYPE_R8:
4776                 save_mode = SAVE_XMM;
4777                 break;
4778         case MONO_TYPE_GENERICINST:
4779                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
4780                         save_mode = SAVE_EAX;
4781                         break;
4782                 }
4783                 /* Fall through */
4784         case MONO_TYPE_VALUETYPE:
4785                 save_mode = SAVE_STRUCT;
4786                 break;
4787         default:
4788                 save_mode = SAVE_EAX;
4789                 break;
4790         }
4791
4792         /* Save the result and copy it into the proper argument register */
4793         switch (save_mode) {
4794         case SAVE_EAX:
4795                 amd64_push_reg (code, AMD64_RAX);
4796                 /* Align stack */
4797                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4798                 if (enable_arguments)
4799                         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RAX, 8);
4800                 break;
4801         case SAVE_STRUCT:
4802                 /* FIXME: */
4803                 if (enable_arguments)
4804                         amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0);
4805                 break;
4806         case SAVE_XMM:
4807                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4808                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4809                 /* Align stack */
4810                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4811                 /* 
4812                  * The result is already in the proper argument register so no copying
4813                  * needed.
4814                  */
4815                 break;
4816         case SAVE_NONE:
4817                 break;
4818         default:
4819                 g_assert_not_reached ();
4820         }
4821
4822         /* Set %al since this is a varargs call */
4823         if (save_mode == SAVE_XMM)
4824                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4825         else
4826                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4827
4828         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4829         amd64_set_reg_template (code, AMD64_ARG_REG1);
4830         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
4831
4832         /* Restore result */
4833         switch (save_mode) {
4834         case SAVE_EAX:
4835                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4836                 amd64_pop_reg (code, AMD64_RAX);
4837                 break;
4838         case SAVE_STRUCT:
4839                 /* FIXME: */
4840                 break;
4841         case SAVE_XMM:
4842                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4843                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4844                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4845                 break;
4846         case SAVE_NONE:
4847                 break;
4848         default:
4849                 g_assert_not_reached ();
4850         }
4851
4852         return code;
4853 }
4854
4855 void
4856 mono_arch_flush_icache (guint8 *code, gint size)
4857 {
4858         /* Not needed */
4859 }
4860
4861 void
4862 mono_arch_flush_register_windows (void)
4863 {
4864 }
4865
4866 gboolean 
4867 mono_arch_is_inst_imm (gint64 imm)
4868 {
4869         return amd64_is_imm32 (imm);
4870 }
4871
4872 /*
4873  * Determine whenever the trap whose info is in SIGINFO is caused by
4874  * integer overflow.
4875  */
4876 gboolean
4877 mono_arch_is_int_overflow (void *sigctx, void *info)
4878 {
4879         MonoContext ctx;
4880         guint8* rip;
4881         int reg;
4882         gint64 value;
4883
4884         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
4885
4886         rip = (guint8*)ctx.rip;
4887
4888         if (IS_REX (rip [0])) {
4889                 reg = amd64_rex_b (rip [0]);
4890                 rip ++;
4891         }
4892         else
4893                 reg = 0;
4894
4895         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4896                 /* idiv REG */
4897                 reg += x86_modrm_rm (rip [1]);
4898
4899                 switch (reg) {
4900                 case AMD64_RAX:
4901                         value = ctx.rax;
4902                         break;
4903                 case AMD64_RBX:
4904                         value = ctx.rbx;
4905                         break;
4906                 case AMD64_RCX:
4907                         value = ctx.rcx;
4908                         break;
4909                 case AMD64_RDX:
4910                         value = ctx.rdx;
4911                         break;
4912                 case AMD64_RBP:
4913                         value = ctx.rbp;
4914                         break;
4915                 case AMD64_RSP:
4916                         value = ctx.rsp;
4917                         break;
4918                 case AMD64_RSI:
4919                         value = ctx.rsi;
4920                         break;
4921                 case AMD64_RDI:
4922                         value = ctx.rdi;
4923                         break;
4924                 case AMD64_R12:
4925                         value = ctx.r12;
4926                         break;
4927                 case AMD64_R13:
4928                         value = ctx.r13;
4929                         break;
4930                 case AMD64_R14:
4931                         value = ctx.r14;
4932                         break;
4933                 case AMD64_R15:
4934                         value = ctx.r15;
4935                         break;
4936                 default:
4937                         g_assert_not_reached ();
4938                         reg = -1;
4939                 }                       
4940
4941                 if (value == -1)
4942                         return TRUE;
4943         }
4944
4945         return FALSE;
4946 }
4947
4948 guint32
4949 mono_arch_get_patch_offset (guint8 *code)
4950 {
4951         return 3;
4952 }
4953
4954 /**
4955  * mono_breakpoint_clean_code:
4956  *
4957  * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
4958  * breakpoints in the original code, they are removed in the copy.
4959  *
4960  * Returns TRUE if no sw breakpoint was present.
4961  */
4962 gboolean
4963 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
4964 {
4965         int i;
4966         gboolean can_write = TRUE;
4967         /*
4968          * If method_start is non-NULL we need to perform bound checks, since we access memory
4969          * at code - offset we could go before the start of the method and end up in a different
4970          * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
4971          * instead.
4972          */
4973         if (!method_start || code - offset >= method_start) {
4974                 memcpy (buf, code - offset, size);
4975         } else {
4976                 int diff = code - method_start;
4977                 memset (buf, 0, size);
4978                 memcpy (buf + offset - diff, method_start, diff + size - offset);
4979         }
4980         code -= offset;
4981         for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
4982                 int idx = mono_breakpoint_info_index [i];
4983                 guint8 *ptr;
4984                 if (idx < 1)
4985                         continue;
4986                 ptr = mono_breakpoint_info [idx].address;
4987                 if (ptr >= code && ptr < code + size) {
4988                         guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
4989                         can_write = FALSE;
4990                         /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
4991                         buf [ptr - code] = saved_byte;
4992                 }
4993         }
4994         return can_write;
4995 }
4996
4997 gpointer
4998 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
4999 {
5000         guint8 buf [10];
5001         guint32 reg;
5002         gint32 disp;
5003         guint8 rex = 0;
5004
5005         mono_breakpoint_clean_code (NULL, code, 9, buf, sizeof (buf));
5006         code = buf + 9;
5007
5008         *displacement = 0;
5009
5010         /* go to the start of the call instruction
5011          *
5012          * address_byte = (m << 6) | (o << 3) | reg
5013          * call opcode: 0xff address_byte displacement
5014          * 0xff m=1,o=2 imm8
5015          * 0xff m=2,o=2 imm32
5016          */
5017         code -= 7;
5018
5019         /* 
5020          * A given byte sequence can match more than case here, so we have to be
5021          * really careful about the ordering of the cases. Longer sequences
5022          * come first.
5023          */
5024 #ifdef MONO_ARCH_HAVE_IMT
5025         if ((code [-2] == 0x41) && (code [-1] == 0xbb) && (code [4] == 0xff) && (x86_modrm_mod (code [5]) == 1) && (x86_modrm_reg (code [5]) == 2) && ((signed char)code [6] < 0)) {
5026                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == r11
5027                  * 41 bb 14 f8 28 08       mov    $0x828f814,%r11d
5028                  * ff 50 fc                call   *0xfffffffc(%rax)
5029                  */
5030                 reg = amd64_modrm_rm (code [5]);
5031                 disp = (signed char)code [6];
5032                 /* R10 is clobbered by the IMT thunk code */
5033                 g_assert (reg != AMD64_R10);
5034         }
5035 #else
5036         if (0) {
5037         }
5038 #endif
5039         else if ((code [-1] == 0x8b) && (amd64_modrm_mod (code [0]) == 0x2) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
5040                         /*
5041                          * This is a interface call
5042                          * 48 8b 80 f0 e8 ff ff   mov    0xffffffffffffe8f0(%rax),%rax
5043                          * ff 10                  callq  *(%rax)
5044                          */
5045                 if (IS_REX (code [4]))
5046                         rex = code [4];
5047                 reg = amd64_modrm_rm (code [6]);
5048                 disp = 0;
5049                 /* R10 is clobbered by the IMT thunk code */
5050                 g_assert (reg != AMD64_R10);
5051         } else if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
5052                 /* call OFFSET(%rip) */
5053                 disp = *(guint32*)(code + 3);
5054                 return (gpointer*)(code + disp + 7);
5055         }
5056         else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
5057                 /* call *[reg+disp32] */
5058                 if (IS_REX (code [0]))
5059                         rex = code [0];
5060                 reg = amd64_modrm_rm (code [2]);
5061                 disp = *(gint32*)(code + 3);
5062                 /* R10 is clobbered by the IMT thunk code */
5063                 g_assert (reg != AMD64_R10);
5064         }
5065         else if (code [2] == 0xe8) {
5066                 /* call <ADDR> */
5067                 return NULL;
5068         }
5069         else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
5070                 /* call *%reg */
5071                 return NULL;
5072         }
5073         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) {
5074                 /* call *[reg+disp8] */
5075                 if (IS_REX (code [3]))
5076                         rex = code [3];
5077                 reg = amd64_modrm_rm (code [5]);
5078                 disp = *(gint8*)(code + 6);
5079                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
5080         }
5081         else if ((code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
5082                         /*
5083                          * This is a interface call: should check the above code can't catch it earlier 
5084                          * 8b 40 30   mov    0x30(%eax),%eax
5085                          * ff 10      call   *(%eax)
5086                          */
5087                 if (IS_REX (code [4]))
5088                         rex = code [4];
5089                 reg = amd64_modrm_rm (code [6]);
5090                 disp = 0;
5091         }
5092         else
5093                 g_assert_not_reached ();
5094
5095         reg += amd64_rex_b (rex);
5096
5097         /* R11 is clobbered by the trampoline code */
5098         g_assert (reg != AMD64_R11);
5099
5100         *displacement = disp;
5101         return regs [reg];
5102 }
5103
5104 gpointer*
5105 mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
5106 {
5107         gpointer vt;
5108         int displacement;
5109         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
5110         if (!vt)
5111                 return NULL;
5112         return (gpointer*)((char*)vt + displacement);
5113 }
5114
5115 int
5116 mono_arch_get_this_arg_reg (MonoMethodSignature *sig, MonoGenericSharingContext *gsctx)
5117 {
5118         int this_reg = AMD64_ARG_REG1;
5119
5120         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5121                 CallInfo *cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5122                 
5123                 if (cinfo->ret.storage != ArgValuetypeInReg)
5124                         this_reg = AMD64_ARG_REG2;
5125                 g_free (cinfo);
5126         }
5127
5128         return this_reg;
5129 }
5130
5131 gpointer
5132 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
5133 {
5134         return (gpointer)regs [mono_arch_get_this_arg_reg (sig, NULL)];
5135 }
5136
5137 #define MAX_ARCH_DELEGATE_PARAMS 10
5138
5139 gpointer
5140 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5141 {
5142         guint8 *code, *start;
5143         int i;
5144
5145         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5146                 return NULL;
5147
5148         /* FIXME: Support more cases */
5149         if (MONO_TYPE_ISSTRUCT (sig->ret))
5150                 return NULL;
5151
5152         if (has_target) {
5153                 static guint8* cached = NULL;
5154                 mono_mini_arch_lock ();
5155                 if (cached) {
5156                         mono_mini_arch_unlock ();
5157                         return cached;
5158                 }
5159
5160                 start = code = mono_global_codeman_reserve (64);
5161
5162                 /* Replace the this argument with the target */
5163                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
5164                 amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, target), 8);
5165                 amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5166
5167                 g_assert ((code - start) < 64);
5168
5169                 cached = start;
5170                 mono_debug_add_delegate_trampoline (start, code - start);
5171                 mono_mini_arch_unlock ();
5172         } else {
5173                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5174                 for (i = 0; i < sig->param_count; ++i)
5175                         if (!mono_is_regsize_var (sig->params [i]))
5176                                 return NULL;
5177                 if (sig->param_count > 4)
5178                         return NULL;
5179
5180                 mono_mini_arch_lock ();
5181                 code = cache [sig->param_count];
5182                 if (code) {
5183                         mono_mini_arch_unlock ();
5184                         return code;
5185                 }
5186
5187                 start = code = mono_global_codeman_reserve (64);
5188
5189                 if (sig->param_count == 0) {
5190                         amd64_jump_membase (code, AMD64_ARG_REG1, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5191                 } else {
5192                         /* We have to shift the arguments left */
5193                         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
5194                         for (i = 0; i < sig->param_count; ++i)
5195                                 amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
5196
5197                         amd64_jump_membase (code, AMD64_RAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5198                 }
5199                 g_assert ((code - start) < 64);
5200
5201                 cache [sig->param_count] = start;
5202                 
5203                 mono_debug_add_delegate_trampoline (start, code - start);
5204                 mono_mini_arch_unlock ();
5205         }
5206
5207         return start;
5208 }
5209
5210 /*
5211  * Support for fast access to the thread-local lmf structure using the GS
5212  * segment register on NPTL + kernel 2.6.x.
5213  */
5214
5215 static gboolean tls_offset_inited = FALSE;
5216
5217 void
5218 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5219 {
5220         if (!tls_offset_inited) {
5221                 tls_offset_inited = TRUE;
5222 #ifdef MONO_XEN_OPT
5223                 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
5224 #endif
5225                 appdomain_tls_offset = mono_domain_get_tls_offset ();
5226                 lmf_tls_offset = mono_get_lmf_tls_offset ();
5227                 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
5228                 thread_tls_offset = mono_thread_get_tls_offset ();
5229         }               
5230 }
5231
5232 void
5233 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5234 {
5235 }
5236
5237 void
5238 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
5239 {
5240         MonoCallInst *call = (MonoCallInst*)inst;
5241         CallInfo * cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, inst->signature, FALSE);
5242
5243         if (vt_reg != -1) {
5244                 MonoInst *vtarg;
5245
5246                 if (cinfo->ret.storage == ArgValuetypeInReg) {
5247                         /*
5248                          * The valuetype is in RAX:RDX after the call, need to be copied to
5249                          * the stack. Push the address here, so the call instruction can
5250                          * access it.
5251                          */
5252                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
5253                         vtarg->sreg1 = vt_reg;
5254                         mono_bblock_add_inst (cfg->cbb, vtarg);
5255
5256                         /* Align stack */
5257                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
5258                 }
5259                 else {
5260                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
5261                         vtarg->sreg1 = vt_reg;
5262                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
5263                         mono_bblock_add_inst (cfg->cbb, vtarg);
5264
5265                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
5266                 }
5267         }
5268
5269         /* add the this argument */
5270         if (this_reg != -1) {
5271                 MonoInst *this;
5272                 MONO_INST_NEW (cfg, this, OP_MOVE);
5273                 this->type = this_type;
5274                 this->sreg1 = this_reg;
5275                 this->dreg = mono_regstate_next_int (cfg->rs);
5276                 mono_bblock_add_inst (cfg->cbb, this);
5277
5278                 mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
5279         }
5280 }
5281
5282 #ifdef MONO_ARCH_HAVE_IMT
5283
5284 #define CMP_SIZE (6 + 1)
5285 #define CMP_REG_REG_SIZE (4 + 1)
5286 #define BR_SMALL_SIZE 2
5287 #define BR_LARGE_SIZE 6
5288 #define MOV_REG_IMM_SIZE 10
5289 #define MOV_REG_IMM_32BIT_SIZE 6
5290 #define JUMP_REG_SIZE (2 + 1)
5291
5292 static int
5293 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
5294 {
5295         int i, distance = 0;
5296         for (i = start; i < target; ++i)
5297                 distance += imt_entries [i]->chunk_size;
5298         return distance;
5299 }
5300
5301 /*
5302  * LOCKING: called with the domain lock held
5303  */
5304 gpointer
5305 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
5306 {
5307         int i;
5308         int size = 0;
5309         guint8 *code, *start;
5310         gboolean vtable_is_32bit = ((gsize)(vtable) == (gsize)(int)(gsize)(vtable));
5311
5312         for (i = 0; i < count; ++i) {
5313                 MonoIMTCheckItem *item = imt_entries [i];
5314                 if (item->is_equals) {
5315                         if (item->check_target_idx) {
5316                                 if (!item->compare_done) {
5317                                         if (amd64_is_imm32 (item->method))
5318                                                 item->chunk_size += CMP_SIZE;
5319                                         else
5320                                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
5321                                 }
5322                                 if (vtable_is_32bit)
5323                                         item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
5324                                 else
5325                                         item->chunk_size += MOV_REG_IMM_SIZE;
5326                                 item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
5327                         } else {
5328                                 if (vtable_is_32bit)
5329                                         item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
5330                                 else
5331                                         item->chunk_size += MOV_REG_IMM_SIZE;
5332                                 item->chunk_size += JUMP_REG_SIZE;
5333                                 /* with assert below:
5334                                  * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5335                                  */
5336                         }
5337                 } else {
5338                         if (amd64_is_imm32 (item->method))
5339                                 item->chunk_size += CMP_SIZE;
5340                         else
5341                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
5342                         item->chunk_size += BR_LARGE_SIZE;
5343                         imt_entries [item->check_target_idx]->compare_done = TRUE;
5344                 }
5345                 size += item->chunk_size;
5346         }
5347         code = mono_code_manager_reserve (domain->code_mp, size);
5348         start = code;
5349         for (i = 0; i < count; ++i) {
5350                 MonoIMTCheckItem *item = imt_entries [i];
5351                 item->code_target = code;
5352                 if (item->is_equals) {
5353                         if (item->check_target_idx) {
5354                                 if (!item->compare_done) {
5355                                         if (amd64_is_imm32 (item->method))
5356                                                 amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
5357                                         else {
5358                                                 amd64_mov_reg_imm (code, AMD64_R10, item->method);
5359                                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
5360                                         }
5361                                 }
5362                                 item->jmp_code = code;
5363                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
5364                                 /* See the comment below about R10 */
5365                                 amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
5366                                 amd64_jump_membase (code, AMD64_R10, 0);
5367                         } else {
5368                                 /* enable the commented code to assert on wrong method */
5369 #if 0
5370                                 if (amd64_is_imm32 (item->method))
5371                                         amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
5372                                 else {
5373                                         amd64_mov_reg_imm (code, AMD64_R10, item->method);
5374                                         amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
5375                                 }
5376                                 item->jmp_code = code;
5377                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
5378                                 /* See the comment below about R10 */
5379                                 amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
5380                                 amd64_jump_membase (code, AMD64_R10, 0);
5381                                 amd64_patch (item->jmp_code, code);
5382                                 amd64_breakpoint (code);
5383                                 item->jmp_code = NULL;
5384 #else
5385                                 /* We're using R10 here because R11
5386                                    needs to be preserved.  R10 needs
5387                                    to be preserved for calls which
5388                                    require a runtime generic context,
5389                                    but interface calls don't. */
5390                                 amd64_mov_reg_imm (code, AMD64_R10, & (vtable->vtable [item->vtable_slot]));
5391                                 amd64_jump_membase (code, AMD64_R10, 0);
5392 #endif
5393                         }
5394                 } else {
5395                         if (amd64_is_imm32 (item->method))
5396                                 amd64_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->method);
5397                         else {
5398                                 amd64_mov_reg_imm (code, AMD64_R10, item->method);
5399                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, AMD64_R10);
5400                         }
5401                         item->jmp_code = code;
5402                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5403                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5404                         else
5405                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5406                 }
5407                 g_assert (code - item->code_target <= item->chunk_size);
5408         }
5409         /* patch the branches to get to the target items */
5410         for (i = 0; i < count; ++i) {
5411                 MonoIMTCheckItem *item = imt_entries [i];
5412                 if (item->jmp_code) {
5413                         if (item->check_target_idx) {
5414                                 amd64_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5415                         }
5416                 }
5417         }
5418                 
5419         mono_stats.imt_thunks_size += code - start;
5420         g_assert (code - start <= size);
5421
5422         return start;
5423 }
5424
5425 MonoMethod*
5426 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
5427 {
5428         return regs [MONO_ARCH_IMT_REG];
5429 }
5430
5431 MonoObject*
5432 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
5433 {
5434         return regs [mono_arch_get_this_arg_reg (mono_method_signature (method), gsctx)];
5435 }
5436 #endif
5437
5438 MonoVTable*
5439 mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
5440 {
5441         return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5442 }
5443
5444 MonoInst*
5445 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5446 {
5447         MonoInst *ins = NULL;
5448
5449         if (cmethod->klass == mono_defaults.math_class) {
5450                 if (strcmp (cmethod->name, "Sin") == 0) {
5451                         MONO_INST_NEW (cfg, ins, OP_SIN);
5452                         ins->inst_i0 = args [0];
5453                 } else if (strcmp (cmethod->name, "Cos") == 0) {
5454                         MONO_INST_NEW (cfg, ins, OP_COS);
5455                         ins->inst_i0 = args [0];
5456                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5457                         MONO_INST_NEW (cfg, ins, OP_SQRT);
5458                         ins->inst_i0 = args [0];
5459                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5460                         MONO_INST_NEW (cfg, ins, OP_ABS);
5461                         ins->inst_i0 = args [0];
5462                 }
5463
5464                 if (cfg->opt & MONO_OPT_CMOV) {
5465                         int opcode = 0;
5466
5467                         if (strcmp (cmethod->name, "Min") == 0) {
5468                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5469                                         opcode = OP_IMIN;
5470                                 if (fsig->params [0]->type == MONO_TYPE_U4)
5471                                         opcode = OP_IMIN_UN;
5472                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
5473                                         opcode = OP_LMIN;
5474                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
5475                                         opcode = OP_LMIN_UN;
5476                         } else if (strcmp (cmethod->name, "Max") == 0) {
5477                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5478                                         opcode = OP_IMAX;
5479                                 if (fsig->params [0]->type == MONO_TYPE_U4)
5480                                         opcode = OP_IMAX_UN;
5481                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
5482                                         opcode = OP_LMAX;
5483                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
5484                                         opcode = OP_LMAX_UN;
5485                         }               
5486
5487                         if (opcode) {
5488                                 MONO_INST_NEW (cfg, ins, opcode);
5489                                 ins->inst_i0 = args [0];
5490                                 ins->inst_i1 = args [1];
5491                         }
5492                 }
5493
5494 #if 0
5495                 /* OP_FREM is not IEEE compatible */
5496                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5497                         MONO_INST_NEW (cfg, ins, OP_FREM);
5498                         ins->inst_i0 = args [0];
5499                         ins->inst_i1 = args [1];
5500                 }
5501 #endif
5502         }
5503
5504         return ins;
5505 }
5506
5507 gboolean
5508 mono_arch_print_tree (MonoInst *tree, int arity)
5509 {
5510         return 0;
5511 }
5512
5513 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5514 {
5515         MonoInst* ins;
5516         
5517         if (appdomain_tls_offset == -1)
5518                 return NULL;
5519         
5520         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5521         ins->inst_offset = appdomain_tls_offset;
5522         return ins;
5523 }
5524
5525 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5526 {
5527         MonoInst* ins;
5528         
5529         if (thread_tls_offset == -1)
5530                 return NULL;
5531         
5532         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5533         ins->inst_offset = thread_tls_offset;
5534         return ins;
5535 }
5536
5537 #define _CTX_REG(ctx,fld,i) ((gpointer)((&ctx->fld)[i]))
5538
5539 gpointer
5540 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5541 {
5542         switch (reg) {
5543         case AMD64_RCX: return (gpointer)ctx->rcx;
5544         case AMD64_RDX: return (gpointer)ctx->rdx;
5545         case AMD64_RBX: return (gpointer)ctx->rbx;
5546         case AMD64_RBP: return (gpointer)ctx->rbp;
5547         case AMD64_RSP: return (gpointer)ctx->rsp;
5548         default:
5549                 if (reg < 8)
5550                         return _CTX_REG (ctx, rax, reg);
5551                 else if (reg >= 12)
5552                         return _CTX_REG (ctx, r12, reg - 12);
5553                 else
5554                         g_assert_not_reached ();
5555         }
5556 }