Fri Sep 30 19:10:29 CEST 2005 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *
11  * (C) 2003 Ximian, Inc.
12  */
13 #include "mini.h"
14 #include <string.h>
15 #include <math.h>
16
17 #include <mono/metadata/appdomain.h>
18 #include <mono/metadata/debug-helpers.h>
19 #include <mono/metadata/threads.h>
20 #include <mono/metadata/profiler-private.h>
21 #include <mono/utils/mono-math.h>
22
23 #include "trace.h"
24 #include "mini-amd64.h"
25 #include "inssel.h"
26 #include "cpu-amd64.h"
27
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 static gboolean use_sse2 = !MONO_ARCH_USE_FPSTACK;
33
34 const char * const amd64_desc [OP_LAST];
35 static const char*const * ins_spec = amd64_desc;
36
37 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
38
39 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
40
41 #ifdef PLATFORM_WIN32
42 /* Under windows, the default pinvoke calling convention is stdcall */
43 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
44 #else
45 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
46 #endif
47
48 #define ARGS_OFFSET 16
49 #define GP_SCRATCH_REG AMD64_R11
50
51 /*
52  * AMD64 register usage:
53  * - callee saved registers are used for global register allocation
54  * - %r11 is used for materializing 64 bit constants in opcodes
55  * - the rest is used for local allocation
56  */
57
58 /*
59  * Floating point comparison results:
60  *                  ZF PF CF
61  * A > B            0  0  0
62  * A < B            0  0  1
63  * A = B            1  0  0
64  * A > B            0  0  0
65  * UNORDERED        1  1  1
66  */
67
68 #define NOT_IMPLEMENTED g_assert_not_reached ()
69
70 const char*
71 mono_arch_regname (int reg) {
72         switch (reg) {
73         case AMD64_RAX: return "%rax";
74         case AMD64_RBX: return "%rbx";
75         case AMD64_RCX: return "%rcx";
76         case AMD64_RDX: return "%rdx";
77         case AMD64_RSP: return "%rsp";  
78         case AMD64_RBP: return "%rbp";
79         case AMD64_RDI: return "%rdi";
80         case AMD64_RSI: return "%rsi";
81         case AMD64_R8: return "%r8";
82         case AMD64_R9: return "%r9";
83         case AMD64_R10: return "%r10";
84         case AMD64_R11: return "%r11";
85         case AMD64_R12: return "%r12";
86         case AMD64_R13: return "%r13";
87         case AMD64_R14: return "%r14";
88         case AMD64_R15: return "%r15";
89         }
90         return "unknown";
91 }
92
93 static const char * xmmregs [] = {
94         "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8",
95         "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
96 };
97
98 const char*
99 mono_arch_fregname (int reg)
100 {
101         if (reg < AMD64_XMM_NREG)
102                 return xmmregs [reg];
103         else
104                 return "unknown";
105 }
106
107 static inline void 
108 amd64_patch (unsigned char* code, gpointer target)
109 {
110         /* Skip REX */
111         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
112                 code += 1;
113
114         if ((code [0] & 0xf8) == 0xb8) {
115                 /* amd64_set_reg_template */
116                 *(guint64*)(code + 1) = (guint64)target;
117         }
118         else if (code [0] == 0x8b) {
119                 /* mov 0(%rip), %dreg */
120                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
121         }
122         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
123                 /* call *<OFFSET>(%rip) */
124                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
125         }
126         else if ((code [0] == 0xe8)) {
127                 /* call <DISP> */
128                 gint64 disp = (guint8*)target - (guint8*)code;
129                 g_assert (amd64_is_imm32 (disp));
130                 x86_patch (code, (unsigned char*)target);
131         }
132         else
133                 x86_patch (code, (unsigned char*)target);
134 }
135
136 typedef enum {
137         ArgInIReg,
138         ArgInFloatSSEReg,
139         ArgInDoubleSSEReg,
140         ArgOnStack,
141         ArgValuetypeInReg,
142         ArgNone /* only in pair_storage */
143 } ArgStorage;
144
145 typedef struct {
146         gint16 offset;
147         gint8  reg;
148         ArgStorage storage;
149
150         /* Only if storage == ArgValuetypeInReg */
151         ArgStorage pair_storage [2];
152         gint8 pair_regs [2];
153 } ArgInfo;
154
155 typedef struct {
156         int nargs;
157         guint32 stack_usage;
158         guint32 reg_usage;
159         guint32 freg_usage;
160         gboolean need_stack_align;
161         ArgInfo ret;
162         ArgInfo sig_cookie;
163         ArgInfo args [1];
164 } CallInfo;
165
166 #define DEBUG(a) if (cfg->verbose_level > 1) a
167
168 #define NEW_ICONST(cfg,dest,val) do {   \
169                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
170                 (dest)->opcode = OP_ICONST;     \
171                 (dest)->inst_c0 = (val);        \
172                 (dest)->type = STACK_I4;        \
173         } while (0)
174
175 #define PARAM_REGS 6
176
177 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
178
179 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
180
181 static void inline
182 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
183 {
184     ainfo->offset = *stack_size;
185
186     if (*gr >= PARAM_REGS) {
187                 ainfo->storage = ArgOnStack;
188                 (*stack_size) += sizeof (gpointer);
189     }
190     else {
191                 ainfo->storage = ArgInIReg;
192                 ainfo->reg = param_regs [*gr];
193                 (*gr) ++;
194     }
195 }
196
197 #define FLOAT_PARAM_REGS 8
198
199 static void inline
200 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
201 {
202     ainfo->offset = *stack_size;
203
204     if (*gr >= FLOAT_PARAM_REGS) {
205                 ainfo->storage = ArgOnStack;
206                 (*stack_size) += sizeof (gpointer);
207     }
208     else {
209                 /* A double register */
210                 if (is_double)
211                         ainfo->storage = ArgInDoubleSSEReg;
212                 else
213                         ainfo->storage = ArgInFloatSSEReg;
214                 ainfo->reg = *gr;
215                 (*gr) += 1;
216     }
217 }
218
219 typedef enum ArgumentClass {
220         ARG_CLASS_NO_CLASS,
221         ARG_CLASS_MEMORY,
222         ARG_CLASS_INTEGER,
223         ARG_CLASS_SSE
224 } ArgumentClass;
225
226 static ArgumentClass
227 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
228 {
229         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
230         MonoType *ptype;
231
232         ptype = mono_type_get_underlying_type (type);
233         switch (ptype->type) {
234         case MONO_TYPE_BOOLEAN:
235         case MONO_TYPE_CHAR:
236         case MONO_TYPE_I1:
237         case MONO_TYPE_U1:
238         case MONO_TYPE_I2:
239         case MONO_TYPE_U2:
240         case MONO_TYPE_I4:
241         case MONO_TYPE_U4:
242         case MONO_TYPE_I:
243         case MONO_TYPE_U:
244         case MONO_TYPE_STRING:
245         case MONO_TYPE_OBJECT:
246         case MONO_TYPE_CLASS:
247         case MONO_TYPE_SZARRAY:
248         case MONO_TYPE_PTR:
249         case MONO_TYPE_FNPTR:
250         case MONO_TYPE_ARRAY:
251         case MONO_TYPE_I8:
252         case MONO_TYPE_U8:
253                 class2 = ARG_CLASS_INTEGER;
254                 break;
255         case MONO_TYPE_R4:
256         case MONO_TYPE_R8:
257                 class2 = ARG_CLASS_SSE;
258                 break;
259
260         case MONO_TYPE_TYPEDBYREF:
261                 g_assert_not_reached ();
262
263         case MONO_TYPE_VALUETYPE: {
264                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
265                 int i;
266
267                 for (i = 0; i < info->num_fields; ++i) {
268                         class2 = class1;
269                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
270                 }
271                 break;
272         }
273         default:
274                 g_assert_not_reached ();
275         }
276
277         /* Merge */
278         if (class1 == class2)
279                 ;
280         else if (class1 == ARG_CLASS_NO_CLASS)
281                 class1 = class2;
282         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
283                 class1 = ARG_CLASS_MEMORY;
284         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
285                 class1 = ARG_CLASS_INTEGER;
286         else
287                 class1 = ARG_CLASS_SSE;
288
289         return class1;
290 }
291
292 static void
293 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
294                gboolean is_return,
295                guint32 *gr, guint32 *fr, guint32 *stack_size)
296 {
297         guint32 size, quad, nquads, i;
298         ArgumentClass args [2];
299         MonoMarshalType *info;
300         MonoClass *klass;
301
302         klass = mono_class_from_mono_type (type);
303         if (sig->pinvoke) 
304                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
305         else 
306                 size = mono_type_stack_size (&klass->byval_arg, NULL);
307
308         if (!sig->pinvoke || (size == 0) || (size > 16)) {
309                 /* Allways pass in memory */
310                 ainfo->offset = *stack_size;
311                 *stack_size += ALIGN_TO (size, 8);
312                 ainfo->storage = ArgOnStack;
313
314                 return;
315         }
316
317         /* FIXME: Handle structs smaller than 8 bytes */
318         //if ((size % 8) != 0)
319         //      NOT_IMPLEMENTED;
320
321         if (size > 8)
322                 nquads = 2;
323         else
324                 nquads = 1;
325
326         /*
327          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
328          * The X87 and SSEUP stuff is left out since there are no such types in
329          * the CLR.
330          */
331         info = mono_marshal_load_type_info (klass);
332         g_assert (info);
333         if (info->native_size > 16) {
334                 ainfo->offset = *stack_size;
335                 *stack_size += ALIGN_TO (info->native_size, 8);
336                 ainfo->storage = ArgOnStack;
337
338                 return;
339         }
340
341         for (quad = 0; quad < nquads; ++quad) {
342                 int size, align;
343                 ArgumentClass class1;
344                 
345                 class1 = ARG_CLASS_NO_CLASS;
346                 for (i = 0; i < info->num_fields; ++i) {
347                         size = mono_marshal_type_size (info->fields [i].field->type, 
348                                                                                    info->fields [i].mspec, 
349                                                                                    &align, TRUE, klass->unicode);
350                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
351                                 /* Unaligned field */
352                                 NOT_IMPLEMENTED;
353                         }
354
355                         /* Skip fields in other quad */
356                         if ((quad == 0) && (info->fields [i].offset >= 8))
357                                 continue;
358                         if ((quad == 1) && (info->fields [i].offset < 8))
359                                 continue;
360
361                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
362                 }
363                 g_assert (class1 != ARG_CLASS_NO_CLASS);
364                 args [quad] = class1;
365         }
366
367         /* Post merger cleanup */
368         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
369                 args [0] = args [1] = ARG_CLASS_MEMORY;
370
371         /* Allocate registers */
372         {
373                 int orig_gr = *gr;
374                 int orig_fr = *fr;
375
376                 ainfo->storage = ArgValuetypeInReg;
377                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
378                 for (quad = 0; quad < nquads; ++quad) {
379                         switch (args [quad]) {
380                         case ARG_CLASS_INTEGER:
381                                 if (*gr >= PARAM_REGS)
382                                         args [quad] = ARG_CLASS_MEMORY;
383                                 else {
384                                         ainfo->pair_storage [quad] = ArgInIReg;
385                                         if (is_return)
386                                                 ainfo->pair_regs [quad] = return_regs [*gr];
387                                         else
388                                                 ainfo->pair_regs [quad] = param_regs [*gr];
389                                         (*gr) ++;
390                                 }
391                                 break;
392                         case ARG_CLASS_SSE:
393                                 if (*fr >= FLOAT_PARAM_REGS)
394                                         args [quad] = ARG_CLASS_MEMORY;
395                                 else {
396                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
397                                         ainfo->pair_regs [quad] = *fr;
398                                         (*fr) ++;
399                                 }
400                                 break;
401                         case ARG_CLASS_MEMORY:
402                                 break;
403                         default:
404                                 g_assert_not_reached ();
405                         }
406                 }
407
408                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
409                         /* Revert possible register assignments */
410                         *gr = orig_gr;
411                         *fr = orig_fr;
412
413                         ainfo->offset = *stack_size;
414                         *stack_size += ALIGN_TO (info->native_size, 8);
415                         ainfo->storage = ArgOnStack;
416                 }
417         }
418 }
419
420 /*
421  * get_call_info:
422  *
423  *  Obtain information about a call according to the calling convention.
424  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
425  * Draft Version 0.23" document for more information.
426  */
427 static CallInfo*
428 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
429 {
430         guint32 i, gr, fr;
431         MonoType *ret_type;
432         int n = sig->hasthis + sig->param_count;
433         guint32 stack_size = 0;
434         CallInfo *cinfo;
435
436         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
437
438         gr = 0;
439         fr = 0;
440
441         /* return value */
442         {
443                 ret_type = mono_type_get_underlying_type (sig->ret);
444                 switch (ret_type->type) {
445                 case MONO_TYPE_BOOLEAN:
446                 case MONO_TYPE_I1:
447                 case MONO_TYPE_U1:
448                 case MONO_TYPE_I2:
449                 case MONO_TYPE_U2:
450                 case MONO_TYPE_CHAR:
451                 case MONO_TYPE_I4:
452                 case MONO_TYPE_U4:
453                 case MONO_TYPE_I:
454                 case MONO_TYPE_U:
455                 case MONO_TYPE_PTR:
456                 case MONO_TYPE_FNPTR:
457                 case MONO_TYPE_CLASS:
458                 case MONO_TYPE_OBJECT:
459                 case MONO_TYPE_SZARRAY:
460                 case MONO_TYPE_ARRAY:
461                 case MONO_TYPE_STRING:
462                         cinfo->ret.storage = ArgInIReg;
463                         cinfo->ret.reg = AMD64_RAX;
464                         break;
465                 case MONO_TYPE_U8:
466                 case MONO_TYPE_I8:
467                         cinfo->ret.storage = ArgInIReg;
468                         cinfo->ret.reg = AMD64_RAX;
469                         break;
470                 case MONO_TYPE_R4:
471                         cinfo->ret.storage = ArgInFloatSSEReg;
472                         cinfo->ret.reg = AMD64_XMM0;
473                         break;
474                 case MONO_TYPE_R8:
475                         cinfo->ret.storage = ArgInDoubleSSEReg;
476                         cinfo->ret.reg = AMD64_XMM0;
477                         break;
478                 case MONO_TYPE_VALUETYPE: {
479                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
480
481                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
482                         if (cinfo->ret.storage == ArgOnStack)
483                                 /* The caller passes the address where the value is stored */
484                                 add_general (&gr, &stack_size, &cinfo->ret);
485                         break;
486                 }
487                 case MONO_TYPE_TYPEDBYREF:
488                         /* Same as a valuetype with size 24 */
489                         add_general (&gr, &stack_size, &cinfo->ret);
490                         ;
491                         break;
492                 case MONO_TYPE_VOID:
493                         break;
494                 default:
495                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
496                 }
497         }
498
499         /* this */
500         if (sig->hasthis)
501                 add_general (&gr, &stack_size, cinfo->args + 0);
502
503         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
504                 gr = PARAM_REGS;
505                 fr = FLOAT_PARAM_REGS;
506                 
507                 /* Emit the signature cookie just before the implicit arguments */
508                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
509         }
510
511         for (i = 0; i < sig->param_count; ++i) {
512                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
513                 MonoType *ptype;
514
515                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
516                         /* We allways pass the sig cookie on the stack for simplicity */
517                         /* 
518                          * Prevent implicit arguments + the sig cookie from being passed 
519                          * in registers.
520                          */
521                         gr = PARAM_REGS;
522                         fr = FLOAT_PARAM_REGS;
523
524                         /* Emit the signature cookie just before the implicit arguments */
525                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
526                 }
527
528                 if (sig->params [i]->byref) {
529                         add_general (&gr, &stack_size, ainfo);
530                         continue;
531                 }
532                 ptype = mono_type_get_underlying_type (sig->params [i]);
533                 switch (ptype->type) {
534                 case MONO_TYPE_BOOLEAN:
535                 case MONO_TYPE_I1:
536                 case MONO_TYPE_U1:
537                         add_general (&gr, &stack_size, ainfo);
538                         break;
539                 case MONO_TYPE_I2:
540                 case MONO_TYPE_U2:
541                 case MONO_TYPE_CHAR:
542                         add_general (&gr, &stack_size, ainfo);
543                         break;
544                 case MONO_TYPE_I4:
545                 case MONO_TYPE_U4:
546                         add_general (&gr, &stack_size, ainfo);
547                         break;
548                 case MONO_TYPE_I:
549                 case MONO_TYPE_U:
550                 case MONO_TYPE_PTR:
551                 case MONO_TYPE_FNPTR:
552                 case MONO_TYPE_CLASS:
553                 case MONO_TYPE_OBJECT:
554                 case MONO_TYPE_STRING:
555                 case MONO_TYPE_SZARRAY:
556                 case MONO_TYPE_ARRAY:
557                         add_general (&gr, &stack_size, ainfo);
558                         break;
559                 case MONO_TYPE_VALUETYPE:
560                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
561                         break;
562                 case MONO_TYPE_TYPEDBYREF:
563                         stack_size += sizeof (MonoTypedRef);
564                         ainfo->storage = ArgOnStack;
565                         break;
566                 case MONO_TYPE_U8:
567                 case MONO_TYPE_I8:
568                         add_general (&gr, &stack_size, ainfo);
569                         break;
570                 case MONO_TYPE_R4:
571                         add_float (&fr, &stack_size, ainfo, FALSE);
572                         break;
573                 case MONO_TYPE_R8:
574                         add_float (&fr, &stack_size, ainfo, TRUE);
575                         break;
576                 default:
577                         g_assert_not_reached ();
578                 }
579         }
580
581         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
582                 gr = PARAM_REGS;
583                 fr = FLOAT_PARAM_REGS;
584                 
585                 /* Emit the signature cookie just before the implicit arguments */
586                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
587         }
588
589         if (stack_size & 0x8) {
590                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
591                 cinfo->need_stack_align = TRUE;
592                 stack_size += 8;
593         }
594
595         cinfo->stack_usage = stack_size;
596         cinfo->reg_usage = gr;
597         cinfo->freg_usage = fr;
598         return cinfo;
599 }
600
601 /*
602  * mono_arch_get_argument_info:
603  * @csig:  a method signature
604  * @param_count: the number of parameters to consider
605  * @arg_info: an array to store the result infos
606  *
607  * Gathers information on parameters such as size, alignment and
608  * padding. arg_info should be large enought to hold param_count + 1 entries. 
609  *
610  * Returns the size of the argument area on the stack.
611  */
612 int
613 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
614 {
615         int k;
616         CallInfo *cinfo = get_call_info (csig, FALSE);
617         guint32 args_size = cinfo->stack_usage;
618
619         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
620         if (csig->hasthis) {
621                 arg_info [0].offset = 0;
622         }
623
624         for (k = 0; k < param_count; k++) {
625                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
626                 /* FIXME: */
627                 arg_info [k + 1].size = 0;
628         }
629
630         g_free (cinfo);
631
632         return args_size;
633 }
634
635 static int 
636 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
637 {
638         return 0;
639 }
640
641 /*
642  * Initialize the cpu to execute managed code.
643  */
644 void
645 mono_arch_cpu_init (void)
646 {
647         guint16 fpcw;
648
649         /* spec compliance requires running with double precision */
650         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
651         fpcw &= ~X86_FPCW_PRECC_MASK;
652         fpcw |= X86_FPCW_PREC_DOUBLE;
653         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
654         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
655 }
656
657 /*
658  * This function returns the optimizations supported on this cpu.
659  */
660 guint32
661 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
662 {
663         int eax, ebx, ecx, edx;
664         guint32 opts = 0;
665
666         /* FIXME: AMD64 */
667
668         *exclude_mask = 0;
669         /* Feature Flags function, flags returned in EDX. */
670         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
671                 if (edx & (1 << 15)) {
672                         opts |= MONO_OPT_CMOV;
673                         if (edx & 1)
674                                 opts |= MONO_OPT_FCMOV;
675                         else
676                                 *exclude_mask |= MONO_OPT_FCMOV;
677                 } else
678                         *exclude_mask |= MONO_OPT_CMOV;
679         }
680         return opts;
681 }
682
683 gboolean
684 mono_amd64_is_sse2 (void)
685 {
686         return use_sse2;
687 }
688
689 static gboolean
690 is_regsize_var (MonoType *t) {
691         if (t->byref)
692                 return TRUE;
693         t = mono_type_get_underlying_type (t);
694         switch (t->type) {
695         case MONO_TYPE_I4:
696         case MONO_TYPE_U4:
697         case MONO_TYPE_I:
698         case MONO_TYPE_U:
699         case MONO_TYPE_PTR:
700         case MONO_TYPE_FNPTR:
701                 return TRUE;
702         case MONO_TYPE_OBJECT:
703         case MONO_TYPE_STRING:
704         case MONO_TYPE_CLASS:
705         case MONO_TYPE_SZARRAY:
706         case MONO_TYPE_ARRAY:
707                 return TRUE;
708         case MONO_TYPE_VALUETYPE:
709                 return FALSE;
710         }
711         return FALSE;
712 }
713
714 GList *
715 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
716 {
717         GList *vars = NULL;
718         int i;
719
720         for (i = 0; i < cfg->num_varinfo; i++) {
721                 MonoInst *ins = cfg->varinfo [i];
722                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
723
724                 /* unused vars */
725                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
726                         continue;
727
728                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
729                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
730                         continue;
731
732                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
733                  * 8bit quantities in caller saved registers on x86 */
734                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
735                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
736                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
737                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
738                         g_assert (i == vmv->idx);
739                         vars = g_list_prepend (vars, vmv);
740                 }
741         }
742
743         vars = mono_varlist_sort (cfg, vars, 0);
744
745         return vars;
746 }
747
748 GList *
749 mono_arch_get_global_int_regs (MonoCompile *cfg)
750 {
751         GList *regs = NULL;
752
753         /* We use the callee saved registers for global allocation */
754         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
755         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
756         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
757         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
758         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
759
760         return regs;
761 }
762
763 /*
764  * mono_arch_regalloc_cost:
765  *
766  *  Return the cost, in number of memory references, of the action of 
767  * allocating the variable VMV into a register during global register
768  * allocation.
769  */
770 guint32
771 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
772 {
773         MonoInst *ins = cfg->varinfo [vmv->idx];
774
775         if (cfg->method->save_lmf)
776                 /* The register is already saved */
777                 /* substract 1 for the invisible store in the prolog */
778                 return (ins->opcode == OP_ARG) ? 0 : 1;
779         else
780                 /* push+pop */
781                 return (ins->opcode == OP_ARG) ? 1 : 2;
782 }
783  
784 void
785 mono_arch_allocate_vars (MonoCompile *m)
786 {
787         MonoMethodSignature *sig;
788         MonoMethodHeader *header;
789         MonoInst *inst;
790         int i, offset;
791         guint32 locals_stack_size, locals_stack_align;
792         gint32 *offsets;
793         CallInfo *cinfo;
794
795         header = mono_method_get_header (m->method);
796
797         sig = mono_method_signature (m->method);
798
799         cinfo = get_call_info (sig, FALSE);
800
801         /*
802          * We use the ABI calling conventions for managed code as well.
803          * Exception: valuetypes are never passed or returned in registers.
804          */
805
806         /* Locals are allocated backwards from %fp */
807         m->frame_reg = AMD64_RBP;
808         offset = 0;
809
810         /* Reserve space for caller saved registers */
811         for (i = 0; i < AMD64_NREG; ++i)
812                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (m->used_int_regs & (1 << i))) {
813                         offset += sizeof (gpointer);
814                 }
815
816         if (m->method->save_lmf) {
817                 /* Reserve stack space for saving LMF + argument regs */
818                 offset += sizeof (MonoLMF);
819                 if (lmf_tls_offset == -1)
820                         /* Need to save argument regs too */
821                         offset += (AMD64_NREG * 8) + (8 * 8);
822                 m->arch.lmf_offset = offset;
823         }
824
825         if (sig->ret->type != MONO_TYPE_VOID) {
826                 switch (cinfo->ret.storage) {
827                 case ArgInIReg:
828                 case ArgInFloatSSEReg:
829                 case ArgInDoubleSSEReg:
830                         if ((MONO_TYPE_ISSTRUCT (sig->ret) && !mono_class_from_mono_type (sig->ret)->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
831                                 /* The register is volatile */
832                                 m->ret->opcode = OP_REGOFFSET;
833                                 m->ret->inst_basereg = AMD64_RBP;
834                                 offset += 8;
835                                 m->ret->inst_offset = - offset;
836                         }
837                         else {
838                                 m->ret->opcode = OP_REGVAR;
839                                 m->ret->inst_c0 = cinfo->ret.reg;
840                         }
841                         break;
842                 case ArgValuetypeInReg:
843                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
844                         offset += 16;
845                         m->ret->opcode = OP_REGOFFSET;
846                         m->ret->inst_basereg = AMD64_RBP;
847                         m->ret->inst_offset = - offset;
848                         break;
849                 default:
850                         g_assert_not_reached ();
851                 }
852                 m->ret->dreg = m->ret->inst_c0;
853         }
854
855         /* Allocate locals */
856         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
857         if (locals_stack_align) {
858                 offset += (locals_stack_align - 1);
859                 offset &= ~(locals_stack_align - 1);
860         }
861         for (i = m->locals_start; i < m->num_varinfo; i++) {
862                 if (offsets [i] != -1) {
863                         MonoInst *inst = m->varinfo [i];
864                         inst->opcode = OP_REGOFFSET;
865                         inst->inst_basereg = AMD64_RBP;
866                         inst->inst_offset = - (offset + offsets [i]);
867                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
868                 }
869         }
870         g_free (offsets);
871         offset += locals_stack_size;
872
873         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
874                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
875                 m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
876         }
877
878         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
879                 inst = m->varinfo [i];
880                 if (inst->opcode != OP_REGVAR) {
881                         ArgInfo *ainfo = &cinfo->args [i];
882                         gboolean inreg = TRUE;
883                         MonoType *arg_type;
884
885                         if (sig->hasthis && (i == 0))
886                                 arg_type = &mono_defaults.object_class->byval_arg;
887                         else
888                                 arg_type = sig->params [i - sig->hasthis];
889
890                         /* FIXME: Allocate volatile arguments to registers */
891                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
892                                 inreg = FALSE;
893
894                         /* 
895                          * Under AMD64, all registers used to pass arguments to functions
896                          * are volatile across calls.
897                          * FIXME: Optimize this.
898                          */
899                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg))
900                                 inreg = FALSE;
901
902                         inst->opcode = OP_REGOFFSET;
903
904                         switch (ainfo->storage) {
905                         case ArgInIReg:
906                         case ArgInFloatSSEReg:
907                         case ArgInDoubleSSEReg:
908                                 inst->opcode = OP_REGVAR;
909                                 inst->dreg = ainfo->reg;
910                                 break;
911                         case ArgOnStack:
912                                 inst->opcode = OP_REGOFFSET;
913                                 inst->inst_basereg = AMD64_RBP;
914                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
915                                 break;
916                         case ArgValuetypeInReg:
917                                 break;
918                         default:
919                                 NOT_IMPLEMENTED;
920                         }
921
922                         if (!inreg && (ainfo->storage != ArgOnStack)) {
923                                 inst->opcode = OP_REGOFFSET;
924                                 inst->inst_basereg = AMD64_RBP;
925                                 /* These arguments are saved to the stack in the prolog */
926                                 if (ainfo->storage == ArgValuetypeInReg)
927                                         offset += 2 * sizeof (gpointer);
928                                 else
929                                         offset += sizeof (gpointer);
930                                 inst->inst_offset = - offset;
931                         }
932                 }
933         }
934
935         m->stack_offset = offset;
936
937         g_free (cinfo);
938 }
939
940 void
941 mono_arch_create_vars (MonoCompile *cfg)
942 {
943         MonoMethodSignature *sig;
944         CallInfo *cinfo;
945
946         sig = mono_method_signature (cfg->method);
947
948         cinfo = get_call_info (sig, FALSE);
949
950         if (cinfo->ret.storage == ArgValuetypeInReg)
951                 cfg->ret_var_is_local = TRUE;
952
953         g_free (cinfo);
954 }
955
956 static void
957 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg, MonoInst *tree)
958 {
959         switch (storage) {
960         case ArgInIReg:
961                 arg->opcode = OP_OUTARG_REG;
962                 arg->inst_left = tree;
963                 arg->inst_right = (MonoInst*)call;
964                 arg->unused = reg;
965                 call->used_iregs |= 1 << reg;
966                 break;
967         case ArgInFloatSSEReg:
968                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
969                 arg->inst_left = tree;
970                 arg->inst_right = (MonoInst*)call;
971                 arg->unused = reg;
972                 call->used_fregs |= 1 << reg;
973                 break;
974         case ArgInDoubleSSEReg:
975                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
976                 arg->inst_left = tree;
977                 arg->inst_right = (MonoInst*)call;
978                 arg->unused = reg;
979                 call->used_fregs |= 1 << reg;
980                 break;
981         default:
982                 g_assert_not_reached ();
983         }
984 }
985
986 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
987  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
988  */
989
990 static int
991 arg_storage_to_ldind (ArgStorage storage)
992 {
993         switch (storage) {
994         case ArgInIReg:
995                 return CEE_LDIND_I;
996         case ArgInDoubleSSEReg:
997                 return CEE_LDIND_R8;
998         case ArgInFloatSSEReg:
999                 return CEE_LDIND_R4;
1000         default:
1001                 g_assert_not_reached ();
1002         }
1003
1004         return -1;
1005 }
1006
1007 /* 
1008  * take the arguments and generate the arch-specific
1009  * instructions to properly call the function in call.
1010  * This includes pushing, moving arguments to the right register
1011  * etc.
1012  * Issue: who does the spilling if needed, and when?
1013  */
1014 MonoCallInst*
1015 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
1016         MonoInst *arg, *in;
1017         MonoMethodSignature *sig;
1018         int i, n, stack_size;
1019         CallInfo *cinfo;
1020         ArgInfo *ainfo;
1021
1022         stack_size = 0;
1023
1024         sig = call->signature;
1025         n = sig->param_count + sig->hasthis;
1026
1027         cinfo = get_call_info (sig, sig->pinvoke);
1028
1029         for (i = 0; i < n; ++i) {
1030                 ainfo = cinfo->args + i;
1031
1032                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
1033                         MonoMethodSignature *tmp_sig;
1034                         
1035                         /* Emit the signature cookie just before the implicit arguments */
1036                         MonoInst *sig_arg;
1037                         /* FIXME: Add support for signature tokens to AOT */
1038                         cfg->disable_aot = TRUE;
1039
1040                         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1041
1042                         /*
1043                          * mono_ArgIterator_Setup assumes the signature cookie is 
1044                          * passed first and all the arguments which were before it are
1045                          * passed on the stack after the signature. So compensate by 
1046                          * passing a different signature.
1047                          */
1048                         tmp_sig = mono_metadata_signature_dup (call->signature);
1049                         tmp_sig->param_count -= call->signature->sentinelpos;
1050                         tmp_sig->sentinelpos = 0;
1051                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1052
1053                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1054                         sig_arg->inst_p0 = tmp_sig;
1055
1056                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1057                         arg->inst_left = sig_arg;
1058                         arg->type = STACK_PTR;
1059
1060                         /* prepend, so they get reversed */
1061                         arg->next = call->out_args;
1062                         call->out_args = arg;
1063                 }
1064
1065                 if (is_virtual && i == 0) {
1066                         /* the argument will be attached to the call instruction */
1067                         in = call->args [i];
1068                 } else {
1069                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1070                         in = call->args [i];
1071                         arg->cil_code = in->cil_code;
1072                         arg->inst_left = in;
1073                         arg->type = in->type;
1074                         /* prepend, so they get reversed */
1075                         arg->next = call->out_args;
1076                         call->out_args = arg;
1077
1078                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1079                                 gint align;
1080                                 guint32 size;
1081
1082                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1083                                         size = sizeof (MonoTypedRef);
1084                                         align = sizeof (gpointer);
1085                                 }
1086                                 else
1087                                 if (sig->pinvoke)
1088                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1089                                 else {
1090                                         /* 
1091                                          * Other backends use mono_type_stack_size (), but that
1092                                          * aligns the size to 8, which is larger than the size of
1093                                          * the source, leading to reads of invalid memory if the
1094                                          * source is at the end of address space.
1095                                          */
1096                                         size = mono_class_value_size (in->klass, &align);
1097                                 }
1098                                 if (ainfo->storage == ArgValuetypeInReg) {
1099                                         if (ainfo->pair_storage [1] == ArgNone) {
1100                                                 MonoInst *load;
1101
1102                                                 /* Simpler case */
1103
1104                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1105                                                 load->inst_left = in;
1106
1107                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1108                                         }
1109                                         else {
1110                                                 /* Trees can't be shared so make a copy */
1111                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1112                                                 MonoInst *load, *load2, *offset_ins;
1113
1114                                                 /* Reg1 */
1115                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1116                                                 load->ssa_op = MONO_SSA_LOAD;
1117                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1118
1119                                                 NEW_ICONST (cfg, offset_ins, 0);
1120                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1121                                                 load2->inst_left = load;
1122                                                 load2->inst_right = offset_ins;
1123
1124                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1125                                                 load->inst_left = load2;
1126
1127                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1128
1129                                                 /* Reg2 */
1130                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1131                                                 load->ssa_op = MONO_SSA_LOAD;
1132                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1133
1134                                                 NEW_ICONST (cfg, offset_ins, 8);
1135                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1136                                                 load2->inst_left = load;
1137                                                 load2->inst_right = offset_ins;
1138
1139                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1140                                                 load->inst_left = load2;
1141
1142                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1143                                                 arg->cil_code = in->cil_code;
1144                                                 arg->type = in->type;
1145                                                 /* prepend, so they get reversed */
1146                                                 arg->next = call->out_args;
1147                                                 call->out_args = arg;
1148
1149                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load);
1150
1151                                                 /* Prepend a copy inst */
1152                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1153                                                 arg->cil_code = in->cil_code;
1154                                                 arg->ssa_op = MONO_SSA_STORE;
1155                                                 arg->inst_left = vtaddr;
1156                                                 arg->inst_right = in;
1157                                                 arg->type = in->type;
1158
1159                                                 /* prepend, so they get reversed */
1160                                                 arg->next = call->out_args;
1161                                                 call->out_args = arg;
1162                                         }
1163                                 }
1164                                 else {
1165                                         arg->opcode = OP_OUTARG_VT;
1166                                         arg->klass = in->klass;
1167                                         arg->unused = sig->pinvoke;
1168                                         arg->inst_imm = size;
1169                                 }
1170                         }
1171                         else {
1172                                 switch (ainfo->storage) {
1173                                 case ArgInIReg:
1174                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1175                                         break;
1176                                 case ArgInFloatSSEReg:
1177                                 case ArgInDoubleSSEReg:
1178                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1179                                         break;
1180                                 case ArgOnStack:
1181                                         arg->opcode = OP_OUTARG;
1182                                         if (!sig->params [i - sig->hasthis]->byref) {
1183                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1184                                                         arg->opcode = OP_OUTARG_R4;
1185                                                 else
1186                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1187                                                                 arg->opcode = OP_OUTARG_R8;
1188                                         }
1189                                         break;
1190                                 default:
1191                                         g_assert_not_reached ();
1192                                 }
1193                         }
1194                 }
1195         }
1196
1197         if (cinfo->need_stack_align) {
1198                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1199                 /* prepend, so they get reversed */
1200                 arg->next = call->out_args;
1201                 call->out_args = arg;
1202         }
1203
1204         call->stack_usage = cinfo->stack_usage;
1205         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1206         cfg->flags |= MONO_CFG_HAS_CALLS;
1207
1208         g_free (cinfo);
1209
1210         return call;
1211 }
1212
1213 #define EMIT_COND_BRANCH(ins,cond,sign) \
1214 if (ins->flags & MONO_INST_BRLABEL) { \
1215         if (ins->inst_i0->inst_c0) { \
1216                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1217         } else { \
1218                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1219                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1220                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1221                         x86_branch8 (code, cond, 0, sign); \
1222                 else \
1223                         x86_branch32 (code, cond, 0, sign); \
1224         } \
1225 } else { \
1226         if (ins->inst_true_bb->native_offset) { \
1227                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1228         } else { \
1229                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1230                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1231                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1232                         x86_branch8 (code, cond, 0, sign); \
1233                 else \
1234                         x86_branch32 (code, cond, 0, sign); \
1235         } \
1236 }
1237
1238 /* emit an exception if condition is fail */
1239 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1240         do {                                                        \
1241                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1242                                     MONO_PATCH_INFO_EXC, exc_name);  \
1243                 x86_branch32 (code, cond, 0, signed);               \
1244         } while (0); 
1245
1246 #define EMIT_FPCOMPARE(code) do { \
1247         amd64_fcompp (code); \
1248         amd64_fnstsw (code); \
1249 } while (0); 
1250
1251 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
1252     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
1253         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
1254         amd64_ ##op (code); \
1255         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
1256         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
1257 } while (0);
1258
1259 static guint8*
1260 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1261 {
1262         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1263
1264         if (cfg->compile_aot) {
1265                 amd64_call_membase (code, AMD64_RIP, 0);
1266         }
1267         else {
1268                 gboolean near_call = FALSE;
1269
1270                 /*
1271                  * Indirect calls are expensive so try to make a near call if possible.
1272                  * The caller memory is allocated by the code manager so it is 
1273                  * guaranteed to be at a 32 bit offset.
1274                  */
1275
1276                 if (patch_type != MONO_PATCH_INFO_ABS) {
1277                         /* The target is in memory allocated using the code manager */
1278                         near_call = TRUE;
1279
1280                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
1281                                 if (((MonoMethod*)data)->klass->image->assembly->aot_module)
1282                                         /* The callee might be an AOT method */
1283                                         near_call = FALSE;
1284                         }
1285
1286                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
1287                                 /* 
1288                                  * The call might go directly to a native function without
1289                                  * the wrapper.
1290                                  */
1291                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name (data);
1292                                 if (mi) {
1293                                         gconstpointer target = mono_icall_get_wrapper (mi);
1294                                         if ((((guint64)target) >> 32) != 0)
1295                                                 near_call = FALSE;
1296                                 }
1297                         }
1298                 }
1299                 else {
1300                         if (mono_find_class_init_trampoline_by_addr (data))
1301                                 near_call = TRUE;
1302                         else {
1303                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
1304                                 if (info) {
1305                                         if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && 
1306                                                 strstr (cfg->method->name, info->name)) {
1307                                                 /* A call to the wrapped function */
1308                                                 if ((((guint64)data) >> 32) == 0)
1309                                                         near_call = TRUE;
1310                                         }
1311                                         else if (info->func == info->wrapper) {
1312                                                 /* No wrapper */
1313                                                 if ((((guint64)info->func) >> 32) == 0)
1314                                                         near_call = TRUE;
1315                                         }
1316                                         else
1317                                                 near_call = TRUE;
1318                                 }
1319                                 else if ((((guint64)data) >> 32) == 0)
1320                                         near_call = TRUE;
1321                         }
1322                 }
1323
1324                 if (cfg->method->dynamic)
1325                         /* These methods are allocated using malloc */
1326                         near_call = FALSE;
1327
1328                 if (near_call) {
1329                         amd64_call_code (code, 0);
1330                 }
1331                 else {
1332                         amd64_set_reg_template (code, GP_SCRATCH_REG);
1333                         amd64_call_reg (code, GP_SCRATCH_REG);
1334                 }
1335         }
1336
1337         return code;
1338 }
1339
1340 /* FIXME: Add more instructions */
1341 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI8_MEMBASE_REG) || ((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_ICONST) || ((ins)->opcode == OP_I8CONST) || ((ins)->opcode == OP_LOAD_MEMBASE))
1342
1343 static void
1344 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1345 {
1346         MonoInst *ins, *last_ins = NULL;
1347         ins = bb->code;
1348
1349         while (ins) {
1350
1351                 switch (ins->opcode) {
1352                 case OP_ICONST:
1353                 case OP_I8CONST:
1354                         /* reg = 0 -> XOR (reg, reg) */
1355                         /* XOR sets cflags on x86, so we cant do it always */
1356                         if (ins->inst_c0 == 0 && (ins->next && INST_IGNORES_CFLAGS (ins->next))) {
1357                                 ins->opcode = CEE_XOR;
1358                                 ins->sreg1 = ins->dreg;
1359                                 ins->sreg2 = ins->dreg;
1360                         }
1361                         break;
1362                 case OP_MUL_IMM: 
1363                         /* remove unnecessary multiplication with 1 */
1364                         if (ins->inst_imm == 1) {
1365                                 if (ins->dreg != ins->sreg1) {
1366                                         ins->opcode = OP_MOVE;
1367                                 } else {
1368                                         last_ins->next = ins->next;
1369                                         ins = ins->next;
1370                                         continue;
1371                                 }
1372                         }
1373                         break;
1374                 case OP_COMPARE_IMM:
1375                         /* OP_COMPARE_IMM (reg, 0) 
1376                          * --> 
1377                          * OP_AMD64_TEST_NULL (reg) 
1378                          */
1379                         if (!ins->inst_imm)
1380                                 ins->opcode = OP_AMD64_TEST_NULL;
1381                         break;
1382                 case OP_ICOMPARE_IMM:
1383                         if (!ins->inst_imm)
1384                                 ins->opcode = OP_X86_TEST_NULL;
1385                         break;
1386                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
1387                         /* 
1388                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1389                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1390                          * -->
1391                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1392                          * OP_COMPARE_IMM reg, imm
1393                          *
1394                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1395                          */
1396                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1397                             ins->inst_basereg == last_ins->inst_destbasereg &&
1398                             ins->inst_offset == last_ins->inst_offset) {
1399                                         ins->opcode = OP_ICOMPARE_IMM;
1400                                         ins->sreg1 = last_ins->sreg1;
1401
1402                                         /* check if we can remove cmp reg,0 with test null */
1403                                         if (!ins->inst_imm)
1404                                                 ins->opcode = OP_X86_TEST_NULL;
1405                                 }
1406
1407                         break;
1408                 case OP_LOAD_MEMBASE:
1409                 case OP_LOADI4_MEMBASE:
1410                         /* 
1411                          * Note: if reg1 = reg2 the load op is removed
1412                          *
1413                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1414                          * OP_LOAD_MEMBASE offset(basereg), reg2
1415                          * -->
1416                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1417                          * OP_MOVE reg1, reg2
1418                          */
1419                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1420                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1421                             ins->inst_basereg == last_ins->inst_destbasereg &&
1422                             ins->inst_offset == last_ins->inst_offset) {
1423                                 if (ins->dreg == last_ins->sreg1) {
1424                                         last_ins->next = ins->next;                             
1425                                         ins = ins->next;                                
1426                                         continue;
1427                                 } else {
1428                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1429                                         ins->opcode = OP_MOVE;
1430                                         ins->sreg1 = last_ins->sreg1;
1431                                 }
1432
1433                         /* 
1434                          * Note: reg1 must be different from the basereg in the second load
1435                          * Note: if reg1 = reg2 is equal then second load is removed
1436                          *
1437                          * OP_LOAD_MEMBASE offset(basereg), reg1
1438                          * OP_LOAD_MEMBASE offset(basereg), reg2
1439                          * -->
1440                          * OP_LOAD_MEMBASE offset(basereg), reg1
1441                          * OP_MOVE reg1, reg2
1442                          */
1443                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1444                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1445                               ins->inst_basereg != last_ins->dreg &&
1446                               ins->inst_basereg == last_ins->inst_basereg &&
1447                               ins->inst_offset == last_ins->inst_offset) {
1448
1449                                 if (ins->dreg == last_ins->dreg) {
1450                                         last_ins->next = ins->next;                             
1451                                         ins = ins->next;                                
1452                                         continue;
1453                                 } else {
1454                                         ins->opcode = OP_MOVE;
1455                                         ins->sreg1 = last_ins->dreg;
1456                                 }
1457
1458                                 //g_assert_not_reached ();
1459
1460 #if 0
1461                         /* 
1462                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1463                          * OP_LOAD_MEMBASE offset(basereg), reg
1464                          * -->
1465                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1466                          * OP_ICONST reg, imm
1467                          */
1468                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1469                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1470                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1471                                    ins->inst_offset == last_ins->inst_offset) {
1472                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1473                                 ins->opcode = OP_ICONST;
1474                                 ins->inst_c0 = last_ins->inst_imm;
1475                                 g_assert_not_reached (); // check this rule
1476 #endif
1477                         }
1478                         break;
1479                 case OP_LOADU1_MEMBASE:
1480                 case OP_LOADI1_MEMBASE:
1481                         /* 
1482                          * Note: if reg1 = reg2 the load op is removed
1483                          *
1484                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1485                          * OP_LOAD_MEMBASE offset(basereg), reg2
1486                          * -->
1487                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1488                          * OP_MOVE reg1, reg2
1489                          */
1490                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1491                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1492                                         ins->inst_offset == last_ins->inst_offset) {
1493                                 if (ins->dreg == last_ins->sreg1) {
1494                                         last_ins->next = ins->next;                             
1495                                         ins = ins->next;                                
1496                                         continue;
1497                                 } else {
1498                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1499                                         ins->opcode = OP_MOVE;
1500                                         ins->sreg1 = last_ins->sreg1;
1501                                 }
1502                         }
1503                         break;
1504                 case OP_LOADU2_MEMBASE:
1505                 case OP_LOADI2_MEMBASE:
1506                         /* 
1507                          * Note: if reg1 = reg2 the load op is removed
1508                          *
1509                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1510                          * OP_LOAD_MEMBASE offset(basereg), reg2
1511                          * -->
1512                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1513                          * OP_MOVE reg1, reg2
1514                          */
1515                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1516                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1517                                         ins->inst_offset == last_ins->inst_offset) {
1518                                 if (ins->dreg == last_ins->sreg1) {
1519                                         last_ins->next = ins->next;                             
1520                                         ins = ins->next;                                
1521                                         continue;
1522                                 } else {
1523                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1524                                         ins->opcode = OP_MOVE;
1525                                         ins->sreg1 = last_ins->sreg1;
1526                                 }
1527                         }
1528                         break;
1529                 case CEE_CONV_I4:
1530                 case CEE_CONV_U4:
1531                 case OP_MOVE:
1532                         /*
1533                          * Removes:
1534                          *
1535                          * OP_MOVE reg, reg 
1536                          */
1537                         if (ins->dreg == ins->sreg1) {
1538                                 if (last_ins)
1539                                         last_ins->next = ins->next;                             
1540                                 ins = ins->next;
1541                                 continue;
1542                         }
1543                         /* 
1544                          * Removes:
1545                          *
1546                          * OP_MOVE sreg, dreg 
1547                          * OP_MOVE dreg, sreg
1548                          */
1549                         if (last_ins && last_ins->opcode == OP_MOVE &&
1550                             ins->sreg1 == last_ins->dreg &&
1551                             ins->dreg == last_ins->sreg1) {
1552                                 last_ins->next = ins->next;                             
1553                                 ins = ins->next;                                
1554                                 continue;
1555                         }
1556                         break;
1557                 }
1558                 last_ins = ins;
1559                 ins = ins->next;
1560         }
1561         bb->last_ins = last_ins;
1562 }
1563
1564 static void
1565 insert_after_ins (MonoBasicBlock *bb, MonoInst *ins, MonoInst *to_insert)
1566 {
1567         if (ins == NULL) {
1568                 ins = bb->code;
1569                 bb->code = to_insert;
1570                 to_insert->next = ins;
1571         }
1572         else {
1573                 to_insert->next = ins->next;
1574                 ins->next = to_insert;
1575         }
1576 }
1577
1578 #define NEW_INS(cfg,dest,op) do {       \
1579                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
1580                 (dest)->opcode = (op);  \
1581         insert_after_ins (bb, last_ins, (dest)); \
1582         } while (0)
1583
1584 /*
1585  * mono_arch_lowering_pass:
1586  *
1587  *  Converts complex opcodes into simpler ones so that each IR instruction
1588  * corresponds to one machine instruction.
1589  */
1590 static void
1591 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1592 {
1593         MonoInst *ins, *temp, *last_ins = NULL;
1594         ins = bb->code;
1595
1596         if (bb->max_ireg > cfg->rs->next_vireg)
1597                 cfg->rs->next_vireg = bb->max_ireg;
1598         if (bb->max_freg > cfg->rs->next_vfreg)
1599                 cfg->rs->next_vfreg = bb->max_freg;
1600
1601         /*
1602          * FIXME: Need to add more instructions, but the current machine 
1603          * description can't model some parts of the composite instructions like
1604          * cdq.
1605          */
1606         while (ins) {
1607                 switch (ins->opcode) {
1608                 case OP_DIV_IMM:
1609                 case OP_REM_IMM:
1610                 case OP_IDIV_IMM:
1611                 case OP_IREM_IMM:
1612                         NEW_INS (cfg, temp, OP_ICONST);
1613                         temp->inst_c0 = ins->inst_imm;
1614                         temp->dreg = mono_regstate_next_int (cfg->rs);
1615                         switch (ins->opcode) {
1616                         case OP_DIV_IMM:
1617                                 ins->opcode = OP_LDIV;
1618                                 break;
1619                         case OP_REM_IMM:
1620                                 ins->opcode = OP_LREM;
1621                                 break;
1622                         case OP_IDIV_IMM:
1623                                 ins->opcode = OP_IDIV;
1624                                 break;
1625                         case OP_IREM_IMM:
1626                                 ins->opcode = OP_IREM;
1627                                 break;
1628                         }
1629                         ins->sreg2 = temp->dreg;
1630                         break;
1631                 case OP_COMPARE_IMM:
1632                         if (!amd64_is_imm32 (ins->inst_imm)) {
1633                                 NEW_INS (cfg, temp, OP_I8CONST);
1634                                 temp->inst_c0 = ins->inst_imm;
1635                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1636                                 ins->opcode = OP_COMPARE;
1637                                 ins->sreg2 = temp->dreg;
1638                         }
1639                         break;
1640                 case OP_LOAD_MEMBASE:
1641                 case OP_LOADI8_MEMBASE:
1642                         if (!amd64_is_imm32 (ins->inst_offset)) {
1643                                 NEW_INS (cfg, temp, OP_I8CONST);
1644                                 temp->inst_c0 = ins->inst_offset;
1645                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1646                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
1647                                 ins->inst_indexreg = temp->dreg;
1648                         }
1649                         break;
1650                 case OP_STORE_MEMBASE_IMM:
1651                 case OP_STOREI8_MEMBASE_IMM:
1652                         if (!amd64_is_imm32 (ins->inst_imm)) {
1653                                 NEW_INS (cfg, temp, OP_I8CONST);
1654                                 temp->inst_c0 = ins->inst_imm;
1655                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1656                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
1657                                 ins->sreg1 = temp->dreg;
1658                         }
1659                         break;
1660                 default:
1661                         break;
1662                 }
1663                 last_ins = ins;
1664                 ins = ins->next;
1665         }
1666         bb->last_ins = last_ins;
1667
1668         bb->max_ireg = cfg->rs->next_vireg;
1669         bb->max_freg = cfg->rs->next_vfreg;
1670 }
1671
1672 static const int 
1673 branch_cc_table [] = {
1674         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1675         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1676         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1677 };
1678
1679 static int
1680 opcode_to_x86_cond (int opcode)
1681 {
1682         switch (opcode) {
1683         case OP_IBEQ:
1684                 return X86_CC_EQ;
1685         case OP_IBNE_UN:
1686                 return X86_CC_NE;
1687         case OP_IBLT:
1688                 return X86_CC_LT;
1689         case OP_IBLT_UN:
1690                 return X86_CC_LT;
1691         case OP_IBGT:
1692                 return X86_CC_GT;
1693         case OP_IBGT_UN:
1694                 return X86_CC_GT;
1695         case OP_IBGE:
1696                 return X86_CC_GE;
1697         case OP_IBGE_UN:
1698                 return X86_CC_GE;
1699         case OP_IBLE:
1700                 return X86_CC_LE;
1701         case OP_IBLE_UN:
1702                 return X86_CC_LE;
1703         case OP_COND_EXC_IOV:
1704                 return X86_CC_O;
1705         case OP_COND_EXC_IC:
1706                 return X86_CC_C;
1707         default:
1708                 g_assert_not_reached ();
1709         }
1710
1711         return -1;
1712 }
1713
1714 /*#include "cprop.c"*/
1715
1716 /*
1717  * Local register allocation.
1718  * We first scan the list of instructions and we save the liveness info of
1719  * each register (when the register is first used, when it's value is set etc.).
1720  * We also reverse the list of instructions (in the InstList list) because assigning
1721  * registers backwards allows for more tricks to be used.
1722  */
1723 void
1724 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1725 {
1726         if (!bb->code)
1727                 return;
1728
1729         mono_arch_lowering_pass (cfg, bb);
1730
1731         mono_local_regalloc (cfg, bb);
1732 }
1733
1734 static unsigned char*
1735 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
1736 {
1737         if (use_sse2) {
1738                 amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
1739         }
1740         else {
1741                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
1742                 x86_fnstcw_membase(code, AMD64_RSP, 0);
1743                 amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
1744                 amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1745                 amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
1746                 amd64_fldcw_membase (code, AMD64_RSP, 2);
1747                 amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
1748                 amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
1749                 amd64_pop_reg (code, dreg);
1750                 amd64_fldcw_membase (code, AMD64_RSP, 0);
1751                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
1752         }
1753
1754         if (size == 1)
1755                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
1756         else if (size == 2)
1757                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
1758         return code;
1759 }
1760
1761 static unsigned char*
1762 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1763 {
1764         int sreg = tree->sreg1;
1765         int need_touch = FALSE;
1766
1767 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1768         if (!tree->flags & MONO_INST_INIT)
1769                 need_touch = TRUE;
1770 #endif
1771
1772         if (need_touch) {
1773                 guint8* br[5];
1774
1775                 /*
1776                  * Under Windows:
1777                  * If requested stack size is larger than one page,
1778                  * perform stack-touch operation
1779                  */
1780                 /*
1781                  * Generate stack probe code.
1782                  * Under Windows, it is necessary to allocate one page at a time,
1783                  * "touching" stack after each successful sub-allocation. This is
1784                  * because of the way stack growth is implemented - there is a
1785                  * guard page before the lowest stack page that is currently commited.
1786                  * Stack normally grows sequentially so OS traps access to the
1787                  * guard page and commits more pages when needed.
1788                  */
1789                 amd64_test_reg_imm (code, sreg, ~0xFFF);
1790                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1791
1792                 br[2] = code; /* loop */
1793                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
1794                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
1795                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1796                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1797                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1798                 amd64_patch (br[3], br[2]);
1799                 amd64_test_reg_reg (code, sreg, sreg);
1800                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1801                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
1802
1803                 br[1] = code; x86_jump8 (code, 0);
1804
1805                 amd64_patch (br[0], code);
1806                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
1807                 amd64_patch (br[1], code);
1808                 amd64_patch (br[4], code);
1809         }
1810         else
1811                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
1812
1813         if (tree->flags & MONO_INST_INIT) {
1814                 int offset = 0;
1815                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
1816                         amd64_push_reg (code, AMD64_RAX);
1817                         offset += 8;
1818                 }
1819                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
1820                         amd64_push_reg (code, AMD64_RCX);
1821                         offset += 8;
1822                 }
1823                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
1824                         amd64_push_reg (code, AMD64_RDI);
1825                         offset += 8;
1826                 }
1827                 
1828                 amd64_shift_reg_imm (code, X86_SHR, sreg, 4);
1829                 if (sreg != AMD64_RCX)
1830                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
1831                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
1832                                 
1833                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
1834                 amd64_cld (code);
1835                 amd64_prefix (code, X86_REP_PREFIX);
1836                 amd64_stosl (code);
1837                 
1838                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
1839                         amd64_pop_reg (code, AMD64_RDI);
1840                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
1841                         amd64_pop_reg (code, AMD64_RCX);
1842                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
1843                         amd64_pop_reg (code, AMD64_RAX);
1844         }
1845         return code;
1846 }
1847
1848 static guint8*
1849 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1850 {
1851         CallInfo *cinfo;
1852         guint32 quad;
1853
1854         /* Move return value to the target register */
1855         /* FIXME: do this in the local reg allocator */
1856         switch (ins->opcode) {
1857         case CEE_CALL:
1858         case OP_CALL_REG:
1859         case OP_CALL_MEMBASE:
1860         case OP_LCALL:
1861         case OP_LCALL_REG:
1862         case OP_LCALL_MEMBASE:
1863                 g_assert (ins->dreg == AMD64_RAX);
1864                 break;
1865         case OP_FCALL:
1866         case OP_FCALL_REG:
1867         case OP_FCALL_MEMBASE:
1868                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
1869                         if (use_sse2)
1870                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
1871                         else {
1872                                 /* FIXME: optimize this */
1873                                 amd64_movss_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0);
1874                                 amd64_fld_membase (code, AMD64_RSP, -8, FALSE);
1875                         }
1876                 }
1877                 else {
1878                         if (use_sse2) {
1879                                 if (ins->dreg != AMD64_XMM0)
1880                                         amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
1881                         }
1882                         else {
1883                                 /* FIXME: optimize this */
1884                                 amd64_movsd_membase_reg (code, AMD64_RSP, -8, AMD64_XMM0);
1885                                 amd64_fld_membase (code, AMD64_RSP, -8, TRUE);
1886                         }
1887                 }
1888                 break;
1889         case OP_VCALL:
1890         case OP_VCALL_REG:
1891         case OP_VCALL_MEMBASE:
1892                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1893                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1894                         /* Pop the destination address from the stack */
1895                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
1896                         amd64_pop_reg (code, AMD64_RCX);
1897                         
1898                         for (quad = 0; quad < 2; quad ++) {
1899                                 switch (cinfo->ret.pair_storage [quad]) {
1900                                 case ArgInIReg:
1901                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
1902                                         break;
1903                                 case ArgInFloatSSEReg:
1904                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
1905                                         break;
1906                                 case ArgInDoubleSSEReg:
1907                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
1908                                         break;
1909                                 case ArgNone:
1910                                         break;
1911                                 default:
1912                                         NOT_IMPLEMENTED;
1913                                 }
1914                         }
1915                 }
1916                 g_free (cinfo);
1917                 break;
1918         }
1919
1920         return code;
1921 }
1922
1923 /*
1924  * emit_load_volatile_arguments:
1925  *
1926  *  Load volatile arguments from the stack to the original input registers.
1927  * Required before a tail call.
1928  */
1929 static guint8*
1930 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1931 {
1932         MonoMethod *method = cfg->method;
1933         MonoMethodSignature *sig;
1934         MonoInst *inst;
1935         CallInfo *cinfo;
1936         guint32 i;
1937
1938         /* FIXME: Generate intermediate code instead */
1939
1940         sig = mono_method_signature (method);
1941
1942         cinfo = get_call_info (sig, FALSE);
1943         
1944         /* This is the opposite of the code in emit_prolog */
1945
1946         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1947                 ArgInfo *ainfo = cinfo->args + i;
1948                 MonoType *arg_type;
1949                 inst = cfg->varinfo [i];
1950
1951                 if (sig->hasthis && (i == 0))
1952                         arg_type = &mono_defaults.object_class->byval_arg;
1953                 else
1954                         arg_type = sig->params [i - sig->hasthis];
1955
1956                 if (inst->opcode != OP_REGVAR) {
1957                         switch (ainfo->storage) {
1958                         case ArgInIReg: {
1959                                 guint32 size = 8;
1960
1961                                 /* FIXME: I1 etc */
1962                                 amd64_mov_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset, size);
1963                                 break;
1964                         }
1965                         case ArgInFloatSSEReg:
1966                                 amd64_movss_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
1967                                 break;
1968                         case ArgInDoubleSSEReg:
1969                                 amd64_movsd_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
1970                                 break;
1971                         default:
1972                                 break;
1973                         }
1974                 }
1975         }
1976
1977         g_free (cinfo);
1978
1979         return code;
1980 }
1981
1982 #define REAL_PRINT_REG(text,reg) \
1983 mono_assert (reg >= 0); \
1984 amd64_push_reg (code, AMD64_RAX); \
1985 amd64_push_reg (code, AMD64_RDX); \
1986 amd64_push_reg (code, AMD64_RCX); \
1987 amd64_push_reg (code, reg); \
1988 amd64_push_imm (code, reg); \
1989 amd64_push_imm (code, text " %d %p\n"); \
1990 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
1991 amd64_call_reg (code, AMD64_RAX); \
1992 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
1993 amd64_pop_reg (code, AMD64_RCX); \
1994 amd64_pop_reg (code, AMD64_RDX); \
1995 amd64_pop_reg (code, AMD64_RAX);
1996
1997 /* benchmark and set based on cpu */
1998 #define LOOP_ALIGNMENT 8
1999 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2000
2001 void
2002 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2003 {
2004         MonoInst *ins;
2005         MonoCallInst *call;
2006         guint offset;
2007         guint8 *code = cfg->native_code + cfg->code_len;
2008         MonoInst *last_ins = NULL;
2009         guint last_offset = 0;
2010         int max_len, cpos;
2011
2012         if (cfg->opt & MONO_OPT_PEEPHOLE)
2013                 peephole_pass (cfg, bb);
2014
2015         if (cfg->opt & MONO_OPT_LOOP) {
2016                 int pad, align = LOOP_ALIGNMENT;
2017                 /* set alignment depending on cpu */
2018                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2019                         pad = align - pad;
2020                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2021                         amd64_padding (code, pad);
2022                         cfg->code_len += pad;
2023                         bb->native_offset = cfg->code_len;
2024                 }
2025         }
2026
2027         if (cfg->verbose_level > 2)
2028                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2029
2030         cpos = bb->max_offset;
2031
2032         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2033                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2034                 g_assert (!cfg->compile_aot);
2035                 cpos += 6;
2036
2037                 cov->data [bb->dfn].cil_code = bb->cil_code;
2038                 /* this is not thread save, but good enough */
2039                 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count); 
2040         }
2041
2042         offset = code - cfg->native_code;
2043
2044         ins = bb->code;
2045         while (ins) {
2046                 offset = code - cfg->native_code;
2047
2048                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2049
2050                 if (offset > (cfg->code_size - max_len - 16)) {
2051                         cfg->code_size *= 2;
2052                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2053                         code = cfg->native_code + offset;
2054                         mono_jit_stats.code_reallocs++;
2055                 }
2056
2057                 mono_debug_record_line_number (cfg, ins, offset);
2058
2059                 switch (ins->opcode) {
2060                 case OP_BIGMUL:
2061                         amd64_mul_reg (code, ins->sreg2, TRUE);
2062                         break;
2063                 case OP_BIGMUL_UN:
2064                         amd64_mul_reg (code, ins->sreg2, FALSE);
2065                         break;
2066                 case OP_X86_SETEQ_MEMBASE:
2067                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2068                         break;
2069                 case OP_STOREI1_MEMBASE_IMM:
2070                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2071                         break;
2072                 case OP_STOREI2_MEMBASE_IMM:
2073                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2074                         break;
2075                 case OP_STOREI4_MEMBASE_IMM:
2076                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2077                         break;
2078                 case OP_STOREI1_MEMBASE_REG:
2079                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2080                         break;
2081                 case OP_STOREI2_MEMBASE_REG:
2082                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2083                         break;
2084                 case OP_STORE_MEMBASE_REG:
2085                 case OP_STOREI8_MEMBASE_REG:
2086                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2087                         break;
2088                 case OP_STOREI4_MEMBASE_REG:
2089                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2090                         break;
2091                 case OP_STORE_MEMBASE_IMM:
2092                 case OP_STOREI8_MEMBASE_IMM:
2093                         g_assert (amd64_is_imm32 (ins->inst_imm));
2094                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2095                         break;
2096                 case CEE_LDIND_I:
2097                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, sizeof (gpointer));
2098                         break;
2099                 case CEE_LDIND_I4:
2100                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2101                         break;
2102                 case CEE_LDIND_U4:
2103                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2104                         break;
2105                 case OP_LOADU4_MEM:
2106                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2107                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2108                         break;
2109                 case OP_LOAD_MEMBASE:
2110                 case OP_LOADI8_MEMBASE:
2111                         g_assert (amd64_is_imm32 (ins->inst_offset));
2112                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2113                         break;
2114                 case OP_LOADI4_MEMBASE:
2115                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2116                         break;
2117                 case OP_LOADU4_MEMBASE:
2118                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2119                         break;
2120                 case OP_LOADU1_MEMBASE:
2121                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2122                         break;
2123                 case OP_LOADI1_MEMBASE:
2124                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2125                         break;
2126                 case OP_LOADU2_MEMBASE:
2127                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2128                         break;
2129                 case OP_LOADI2_MEMBASE:
2130                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2131                         break;
2132                 case OP_AMD64_LOADI8_MEMINDEX:
2133                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
2134                         break;
2135                 case CEE_CONV_I1:
2136                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2137                         break;
2138                 case CEE_CONV_I2:
2139                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2140                         break;
2141                 case CEE_CONV_U1:
2142                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2143                         break;
2144                 case CEE_CONV_U2:
2145                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2146                         break;
2147                 case CEE_CONV_U8:
2148                 case CEE_CONV_U:
2149                         /* Clean out the upper word */
2150                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2151                         break;
2152                 case CEE_CONV_I8:
2153                 case CEE_CONV_I:
2154                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2155                         break;                  
2156                 case OP_COMPARE:
2157                 case OP_LCOMPARE:
2158                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2159                         break;
2160                 case OP_COMPARE_IMM:
2161                         g_assert (amd64_is_imm32 (ins->inst_imm));
2162                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2163                         break;
2164                 case OP_X86_COMPARE_REG_MEMBASE:
2165                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2166                         break;
2167                 case OP_X86_TEST_NULL:
2168                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
2169                         break;
2170                 case OP_AMD64_TEST_NULL:
2171                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
2172                         break;
2173                 case OP_X86_ADD_MEMBASE_IMM:
2174                         /* FIXME: Make a 64 version too */
2175                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2176                         break;
2177                 case OP_X86_ADD_MEMBASE:
2178                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2179                         break;
2180                 case OP_X86_SUB_MEMBASE_IMM:
2181                         g_assert (amd64_is_imm32 (ins->inst_imm));
2182                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2183                         break;
2184                 case OP_X86_SUB_MEMBASE:
2185                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2186                         break;
2187                 case OP_X86_INC_MEMBASE:
2188                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2189                         break;
2190                 case OP_X86_INC_REG:
2191                         amd64_inc_reg_size (code, ins->dreg, 4);
2192                         break;
2193                 case OP_X86_DEC_MEMBASE:
2194                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
2195                         break;
2196                 case OP_X86_DEC_REG:
2197                         amd64_dec_reg_size (code, ins->dreg, 4);
2198                         break;
2199                 case OP_X86_MUL_MEMBASE:
2200                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2201                         break;
2202                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
2203                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
2204                         break;
2205                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2206                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
2207                         break;
2208                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
2209                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
2210                         break;
2211                 case CEE_BREAK:
2212                         amd64_breakpoint (code);
2213                         break;
2214                 case OP_ADDCC:
2215                 case CEE_ADD:
2216                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2217                         break;
2218                 case OP_ADC:
2219                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2220                         break;
2221                 case OP_ADD_IMM:
2222                         g_assert (amd64_is_imm32 (ins->inst_imm));
2223                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2224                         break;
2225                 case OP_ADC_IMM:
2226                         g_assert (amd64_is_imm32 (ins->inst_imm));
2227                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2228                         break;
2229                 case OP_SUBCC:
2230                 case CEE_SUB:
2231                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2232                         break;
2233                 case OP_SBB:
2234                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2235                         break;
2236                 case OP_SUB_IMM:
2237                         g_assert (amd64_is_imm32 (ins->inst_imm));
2238                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2239                         break;
2240                 case OP_SBB_IMM:
2241                         g_assert (amd64_is_imm32 (ins->inst_imm));
2242                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2243                         break;
2244                 case CEE_AND:
2245                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2246                         break;
2247                 case OP_AND_IMM:
2248                         g_assert (amd64_is_imm32 (ins->inst_imm));
2249                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2250                         break;
2251                 case CEE_MUL:
2252                 case OP_LMUL:
2253                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2254                         break;
2255                 case OP_MUL_IMM:
2256                 case OP_LMUL_IMM:
2257                         amd64_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2258                         break;
2259                 case CEE_DIV:
2260                 case OP_LDIV:
2261                         amd64_cdq (code);
2262                         amd64_div_reg (code, ins->sreg2, TRUE);
2263                         break;
2264                 case CEE_DIV_UN:
2265                 case OP_LDIV_UN:
2266                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2267                         amd64_div_reg (code, ins->sreg2, FALSE);
2268                         break;
2269                 case CEE_REM:
2270                 case OP_LREM:
2271                         amd64_cdq (code);
2272                         amd64_div_reg (code, ins->sreg2, TRUE);
2273                         break;
2274                 case CEE_REM_UN:
2275                 case OP_LREM_UN:
2276                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2277                         amd64_div_reg (code, ins->sreg2, FALSE);
2278                         break;
2279                 case OP_LMUL_OVF:
2280                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2281                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2282                         break;
2283                 case CEE_OR:
2284                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2285                         break;
2286                 case OP_OR_IMM
2287 :                       g_assert (amd64_is_imm32 (ins->inst_imm));
2288                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2289                         break;
2290                 case CEE_XOR:
2291                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2292                         break;
2293                 case OP_XOR_IMM:
2294                         g_assert (amd64_is_imm32 (ins->inst_imm));
2295                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2296                         break;
2297                 case CEE_SHL:
2298                 case OP_LSHL:
2299                         g_assert (ins->sreg2 == AMD64_RCX);
2300                         amd64_shift_reg (code, X86_SHL, ins->dreg);
2301                         break;
2302                 case CEE_SHR:
2303                 case OP_LSHR:
2304                         g_assert (ins->sreg2 == AMD64_RCX);
2305                         amd64_shift_reg (code, X86_SAR, ins->dreg);
2306                         break;
2307                 case OP_SHR_IMM:
2308                         g_assert (amd64_is_imm32 (ins->inst_imm));
2309                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2310                         break;
2311                 case OP_LSHR_IMM:
2312                         g_assert (amd64_is_imm32 (ins->inst_imm));
2313                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2314                         break;
2315                 case OP_SHR_UN_IMM:
2316                         g_assert (amd64_is_imm32 (ins->inst_imm));
2317                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2318                         break;
2319                 case OP_LSHR_UN_IMM:
2320                         g_assert (amd64_is_imm32 (ins->inst_imm));
2321                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2322                         break;
2323                 case CEE_SHR_UN:
2324                         g_assert (ins->sreg2 == AMD64_RCX);
2325                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2326                         break;
2327                 case OP_LSHR_UN:
2328                         g_assert (ins->sreg2 == AMD64_RCX);
2329                         amd64_shift_reg (code, X86_SHR, ins->dreg);
2330                         break;
2331                 case OP_SHL_IMM:
2332                         g_assert (amd64_is_imm32 (ins->inst_imm));
2333                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2334                         break;
2335                 case OP_LSHL_IMM:
2336                         g_assert (amd64_is_imm32 (ins->inst_imm));
2337                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2338                         break;
2339
2340                 case OP_IADDCC:
2341                 case OP_IADD:
2342                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
2343                         break;
2344                 case OP_IADC:
2345                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
2346                         break;
2347                 case OP_IADD_IMM:
2348                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
2349                         break;
2350                 case OP_IADC_IMM:
2351                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
2352                         break;
2353                 case OP_ISUBCC:
2354                 case OP_ISUB:
2355                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
2356                         break;
2357                 case OP_ISBB:
2358                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
2359                         break;
2360                 case OP_ISUB_IMM:
2361                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
2362                         break;
2363                 case OP_ISBB_IMM:
2364                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
2365                         break;
2366                 case OP_IAND:
2367                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
2368                         break;
2369                 case OP_IAND_IMM:
2370                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
2371                         break;
2372                 case OP_IOR:
2373                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
2374                         break;
2375                 case OP_IOR_IMM:
2376                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
2377                         break;
2378                 case OP_IXOR:
2379                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
2380                         break;
2381                 case OP_IXOR_IMM:
2382                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
2383                         break;
2384                 case OP_INEG:
2385                         amd64_neg_reg_size (code, ins->sreg1, 4);
2386                         break;
2387                 case OP_INOT:
2388                         amd64_not_reg_size (code, ins->sreg1, 4);
2389                         break;
2390                 case OP_ISHL:
2391                         g_assert (ins->sreg2 == AMD64_RCX);
2392                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
2393                         break;
2394                 case OP_ISHR:
2395                         g_assert (ins->sreg2 == AMD64_RCX);
2396                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
2397                         break;
2398                 case OP_ISHR_IMM:
2399                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
2400                         break;
2401                 case OP_ISHR_UN_IMM:
2402                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
2403                         break;
2404                 case OP_ISHR_UN:
2405                         g_assert (ins->sreg2 == AMD64_RCX);
2406                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
2407                         break;
2408                 case OP_ISHL_IMM:
2409                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
2410                         break;
2411                 case OP_IMUL:
2412                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2413                         break;
2414                 case OP_IMUL_IMM:
2415                         amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
2416                         break;
2417                 case OP_IMUL_OVF:
2418                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
2419                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2420                         break;
2421                 case OP_IMUL_OVF_UN:
2422                 case OP_LMUL_OVF_UN: {
2423                         /* the mul operation and the exception check should most likely be split */
2424                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2425                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
2426                         /*g_assert (ins->sreg2 == X86_EAX);
2427                         g_assert (ins->dreg == X86_EAX);*/
2428                         if (ins->sreg2 == X86_EAX) {
2429                                 non_eax_reg = ins->sreg1;
2430                         } else if (ins->sreg1 == X86_EAX) {
2431                                 non_eax_reg = ins->sreg2;
2432                         } else {
2433                                 /* no need to save since we're going to store to it anyway */
2434                                 if (ins->dreg != X86_EAX) {
2435                                         saved_eax = TRUE;
2436                                         amd64_push_reg (code, X86_EAX);
2437                                 }
2438                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
2439                                 non_eax_reg = ins->sreg2;
2440                         }
2441                         if (ins->dreg == X86_EDX) {
2442                                 if (!saved_eax) {
2443                                         saved_eax = TRUE;
2444                                         amd64_push_reg (code, X86_EAX);
2445                                 }
2446                         } else {
2447                                 saved_edx = TRUE;
2448                                 amd64_push_reg (code, X86_EDX);
2449                         }
2450                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
2451                         /* save before the check since pop and mov don't change the flags */
2452                         if (ins->dreg != X86_EAX)
2453                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
2454                         if (saved_edx)
2455                                 amd64_pop_reg (code, X86_EDX);
2456                         if (saved_eax)
2457                                 amd64_pop_reg (code, X86_EAX);
2458                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2459                         break;
2460                 }
2461                 case OP_IDIV:
2462                         amd64_cdq_size (code, 4);
2463                         amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
2464                         break;
2465                 case OP_IDIV_UN:
2466                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2467                         amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
2468                         break;
2469                 case OP_IREM:
2470                         amd64_cdq_size (code, 4);
2471                         amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
2472                         break;
2473                 case OP_IREM_UN:
2474                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
2475                         amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
2476                         break;
2477                 case OP_ICOMPARE:
2478                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
2479                         break;
2480                 case OP_ICOMPARE_IMM:
2481                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
2482                         break;
2483                 case OP_IBEQ:
2484                 case OP_IBLT:
2485                 case OP_IBGT:
2486                 case OP_IBGE:
2487                 case OP_IBLE:
2488                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
2489                         break;
2490                 case OP_IBNE_UN:
2491                 case OP_IBLT_UN:
2492                 case OP_IBGT_UN:
2493                 case OP_IBGE_UN:
2494                 case OP_IBLE_UN:
2495                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
2496                         break;
2497                 case OP_COND_EXC_IOV:
2498                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
2499                                                                                 TRUE, ins->inst_p1);
2500                         break;
2501                 case OP_COND_EXC_IC:
2502                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
2503                                                                                 FALSE, ins->inst_p1);
2504                         break;
2505                 case CEE_NOT:
2506                         amd64_not_reg (code, ins->sreg1);
2507                         break;
2508                 case CEE_NEG:
2509                         amd64_neg_reg (code, ins->sreg1);
2510                         break;
2511                 case OP_SEXT_I1:
2512                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2513                         break;
2514                 case OP_SEXT_I2:
2515                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2516                         break;
2517                 case OP_SEXT_I4:
2518                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2519                         break;
2520                 case OP_ICONST:
2521                 case OP_I8CONST:
2522                         if ((((guint64)ins->inst_c0) >> 32) == 0)
2523                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
2524                         else
2525                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
2526                         break;
2527                 case OP_AOTCONST:
2528                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2529                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, 8);
2530                         break;
2531                 case CEE_CONV_I4:
2532                 case CEE_CONV_U4:
2533                 case OP_MOVE:
2534                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
2535                         break;
2536                 case OP_AMD64_SET_XMMREG_R4: {
2537                         if (use_sse2) {
2538                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
2539                         }
2540                         else {
2541                                 amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE);
2542                                 /* ins->dreg is set to -1 by the reg allocator */
2543                                 amd64_movss_reg_membase (code, ins->unused, AMD64_RSP, -8);
2544                         }
2545                         break;
2546                 }
2547                 case OP_AMD64_SET_XMMREG_R8: {
2548                         if (use_sse2) {
2549                                 if (ins->dreg != ins->sreg1)
2550                                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
2551                         }
2552                         else {
2553                                 amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE);
2554                                 /* ins->dreg is set to -1 by the reg allocator */
2555                                 amd64_movsd_reg_membase (code, ins->unused, AMD64_RSP, -8);
2556                         }
2557                         break;
2558                 }
2559                 case CEE_JMP: {
2560                         /*
2561                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2562                          * Keep in sync with the code in emit_epilog.
2563                          */
2564                         int pos = 0, i;
2565
2566                         /* FIXME: no tracing support... */
2567                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2568                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2569
2570                         g_assert (!cfg->method->save_lmf);
2571
2572                         code = emit_load_volatile_arguments (cfg, code);
2573
2574                         for (i = 0; i < AMD64_NREG; ++i)
2575                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
2576                                         pos -= sizeof (gpointer);
2577                         
2578                         if (pos)
2579                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
2580
2581                         /* Pop registers in reverse order */
2582                         for (i = AMD64_NREG - 1; i > 0; --i)
2583                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
2584                                         amd64_pop_reg (code, i);
2585                                 }
2586
2587                         amd64_leave (code);
2588                         offset = code - cfg->native_code;
2589                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2590                         if (cfg->compile_aot)
2591                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
2592                         else
2593                                 amd64_set_reg_template (code, AMD64_R11);
2594                         amd64_jump_reg (code, AMD64_R11);
2595                         break;
2596                 }
2597                 case OP_CHECK_THIS:
2598                         /* ensure ins->sreg1 is not NULL */
2599                         amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
2600                         break;
2601                 case OP_ARGLIST: {
2602                         amd64_lea_membase (code, AMD64_R11, AMD64_RBP, cfg->sig_cookie);
2603                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8);
2604                         break;
2605                 }
2606                 case OP_FCALL:
2607                 case OP_LCALL:
2608                 case OP_VCALL:
2609                 case OP_VOIDCALL:
2610                 case CEE_CALL:
2611                         call = (MonoCallInst*)ins;
2612                         /*
2613                          * The AMD64 ABI forces callers to know about varargs.
2614                          */
2615                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
2616                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2617                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
2618                                 /* 
2619                                  * Since the unmanaged calling convention doesn't contain a 
2620                                  * 'vararg' entry, we have to treat every pinvoke call as a
2621                                  * potential vararg call.
2622                                  */
2623                                 guint32 nregs, i;
2624                                 nregs = 0;
2625                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
2626                                         if (call->used_fregs & (1 << i))
2627                                                 nregs ++;
2628                                 if (!nregs)
2629                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2630                                 else
2631                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
2632                         }
2633
2634                         if (ins->flags & MONO_INST_HAS_METHOD)
2635                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2636                         else
2637                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2638                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2639                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2640                         code = emit_move_return_value (cfg, ins, code);
2641                         break;
2642                 case OP_FCALL_REG:
2643                 case OP_LCALL_REG:
2644                 case OP_VCALL_REG:
2645                 case OP_VOIDCALL_REG:
2646                 case OP_CALL_REG:
2647                         call = (MonoCallInst*)ins;
2648
2649                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
2650                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
2651                                 ins->sreg1 = AMD64_R11;
2652                         }
2653
2654                         /*
2655                          * The AMD64 ABI forces callers to know about varargs.
2656                          */
2657                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
2658                                 if (ins->sreg1 == AMD64_RAX) {
2659                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
2660                                         ins->sreg1 = AMD64_R11;
2661                                 }
2662                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2663                         }
2664                         amd64_call_reg (code, ins->sreg1);
2665                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2666                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2667                         code = emit_move_return_value (cfg, ins, code);
2668                         break;
2669                 case OP_FCALL_MEMBASE:
2670                 case OP_LCALL_MEMBASE:
2671                 case OP_VCALL_MEMBASE:
2672                 case OP_VOIDCALL_MEMBASE:
2673                 case OP_CALL_MEMBASE:
2674                         call = (MonoCallInst*)ins;
2675
2676                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
2677                                 /* 
2678                                  * Can't use R11 because it is clobbered by the trampoline 
2679                                  * code, and the reg value is needed by get_vcall_slot_addr.
2680                                  */
2681                                 amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
2682                                 ins->sreg1 = AMD64_RAX;
2683                         }
2684
2685                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
2686                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2687                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
2688                         code = emit_move_return_value (cfg, ins, code);
2689                         break;
2690                 case OP_OUTARG:
2691                 case OP_X86_PUSH:
2692                         amd64_push_reg (code, ins->sreg1);
2693                         break;
2694                 case OP_X86_PUSH_IMM:
2695                         g_assert (amd64_is_imm32 (ins->inst_imm));
2696                         amd64_push_imm (code, ins->inst_imm);
2697                         break;
2698                 case OP_X86_PUSH_MEMBASE:
2699                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
2700                         break;
2701                 case OP_X86_PUSH_OBJ: 
2702                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
2703                         amd64_push_reg (code, AMD64_RDI);
2704                         amd64_push_reg (code, AMD64_RSI);
2705                         amd64_push_reg (code, AMD64_RCX);
2706                         if (ins->inst_offset)
2707                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
2708                         else
2709                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
2710                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
2711                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
2712                         amd64_cld (code);
2713                         amd64_prefix (code, X86_REP_PREFIX);
2714                         amd64_movsd (code);
2715                         amd64_pop_reg (code, AMD64_RCX);
2716                         amd64_pop_reg (code, AMD64_RSI);
2717                         amd64_pop_reg (code, AMD64_RDI);
2718                         break;
2719                 case OP_X86_LEA:
2720                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2721                         break;
2722                 case OP_X86_LEA_MEMBASE:
2723                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2724                         break;
2725                 case OP_X86_XCHG:
2726                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2727                         break;
2728                 case OP_LOCALLOC:
2729                         /* keep alignment */
2730                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2731                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2732                         code = mono_emit_stack_alloc (code, ins);
2733                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
2734                         break;
2735                 case CEE_RET:
2736                         amd64_ret (code);
2737                         break;
2738                 case CEE_THROW: {
2739                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
2740                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2741                                              (gpointer)"mono_arch_throw_exception");
2742                         break;
2743                 }
2744                 case OP_RETHROW: {
2745                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
2746                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2747                                              (gpointer)"mono_arch_rethrow_exception");
2748                         break;
2749                 }
2750                 case OP_CALL_HANDLER: 
2751                         /* Align stack */
2752                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2753                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2754                         amd64_call_imm (code, 0);
2755                         /* Restore stack alignment */
2756                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2757                         break;
2758                 case OP_LABEL:
2759                         ins->inst_c0 = code - cfg->native_code;
2760                         break;
2761                 case CEE_BR:
2762                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2763                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2764                         //break;
2765                         if (ins->flags & MONO_INST_BRLABEL) {
2766                                 if (ins->inst_i0->inst_c0) {
2767                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2768                                 } else {
2769                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2770                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2771                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2772                                                 x86_jump8 (code, 0);
2773                                         else 
2774                                                 x86_jump32 (code, 0);
2775                                 }
2776                         } else {
2777                                 if (ins->inst_target_bb->native_offset) {
2778                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2779                                 } else {
2780                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2781                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2782                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2783                                                 x86_jump8 (code, 0);
2784                                         else 
2785                                                 x86_jump32 (code, 0);
2786                                 } 
2787                         }
2788                         break;
2789                 case OP_BR_REG:
2790                         amd64_jump_reg (code, ins->sreg1);
2791                         break;
2792                 case OP_CEQ:
2793                 case OP_ICEQ:
2794                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2795                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2796                         break;
2797                 case OP_CLT:
2798                 case OP_ICLT:
2799                         amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2800                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2801                         break;
2802                 case OP_CLT_UN:
2803                 case OP_ICLT_UN:
2804                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2805                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2806                         break;
2807                 case OP_CGT:
2808                 case OP_ICGT:
2809                         amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2810                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2811                         break;
2812                 case OP_CGT_UN:
2813                 case OP_ICGT_UN:
2814                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2815                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2816                         break;
2817                 case OP_COND_EXC_EQ:
2818                 case OP_COND_EXC_NE_UN:
2819                 case OP_COND_EXC_LT:
2820                 case OP_COND_EXC_LT_UN:
2821                 case OP_COND_EXC_GT:
2822                 case OP_COND_EXC_GT_UN:
2823                 case OP_COND_EXC_GE:
2824                 case OP_COND_EXC_GE_UN:
2825                 case OP_COND_EXC_LE:
2826                 case OP_COND_EXC_LE_UN:
2827                 case OP_COND_EXC_OV:
2828                 case OP_COND_EXC_NO:
2829                 case OP_COND_EXC_C:
2830                 case OP_COND_EXC_NC:
2831                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2832                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2833                         break;
2834                 case CEE_BEQ:
2835                 case CEE_BNE_UN:
2836                 case CEE_BLT:
2837                 case CEE_BLT_UN:
2838                 case CEE_BGT:
2839                 case CEE_BGT_UN:
2840                 case CEE_BGE:
2841                 case CEE_BGE_UN:
2842                 case CEE_BLE:
2843                 case CEE_BLE_UN:
2844                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2845                         break;
2846
2847                 /* floating point opcodes */
2848                 case OP_R8CONST: {
2849                         double d = *(double *)ins->inst_p0;
2850
2851                         if (use_sse2) {
2852                                 if ((d == 0.0) && (mono_signbit (d) == 0)) {
2853                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
2854                                 }
2855                                 else {
2856                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2857                                         amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
2858                                 }
2859                         }
2860                         else if ((d == 0.0) && (mono_signbit (d) == 0)) {
2861                                 amd64_fldz (code);
2862                         } else if (d == 1.0) {
2863                                 x86_fld1 (code);
2864                         } else {
2865                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2866                                 amd64_fld_membase (code, AMD64_RIP, 0, TRUE);
2867                         }
2868                         break;
2869                 }
2870                 case OP_R4CONST: {
2871                         float f = *(float *)ins->inst_p0;
2872
2873                         if (use_sse2) {
2874                                 if ((f == 0.0) && (mono_signbit (f) == 0)) {
2875                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
2876                                 }
2877                                 else {
2878                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2879                                         amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
2880                                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
2881                                 }
2882                         }
2883                         else if ((f == 0.0) && (mono_signbit (f) == 0)) {
2884                                 amd64_fldz (code);
2885                         } else if (f == 1.0) {
2886                                 x86_fld1 (code);
2887                         } else {
2888                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2889                                 amd64_fld_membase (code, AMD64_RIP, 0, FALSE);
2890                         }
2891                         break;
2892                 }
2893                 case OP_STORER8_MEMBASE_REG:
2894                         if (use_sse2)
2895                                 amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
2896                         else
2897                                 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2898                         break;
2899                 case OP_LOADR8_SPILL_MEMBASE:
2900                         if (use_sse2)
2901                                 g_assert_not_reached ();
2902                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2903                         amd64_fxch (code, 1);
2904                         break;
2905                 case OP_LOADR8_MEMBASE:
2906                         if (use_sse2)
2907                                 amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2908                         else
2909                                 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2910                         break;
2911                 case OP_STORER4_MEMBASE_REG:
2912                         if (use_sse2) {
2913                                 /* This requires a double->single conversion */
2914                                 amd64_sse_cvtsd2ss_reg_reg (code, AMD64_XMM15, ins->sreg1);
2915                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, AMD64_XMM15);
2916                         }
2917                         else
2918                                 amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2919                         break;
2920                 case OP_LOADR4_MEMBASE:
2921                         if (use_sse2) {
2922                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2923                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
2924                         }
2925                         else
2926                                 amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2927                         break;
2928                 case CEE_CONV_R4: /* FIXME: change precision */
2929                 case CEE_CONV_R8:
2930                         if (use_sse2)
2931                                 amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2932                         else {
2933                                 amd64_push_reg (code, ins->sreg1);
2934                                 amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
2935                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2936                         }
2937                         break;
2938                 case CEE_CONV_R_UN:
2939                         /* Emulated */
2940                         g_assert_not_reached ();
2941                         break;
2942                 case OP_LCONV_TO_R4: /* FIXME: change precision */
2943                 case OP_LCONV_TO_R8:
2944                         if (use_sse2)
2945                                 amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
2946                         else {
2947                                 amd64_push_reg (code, ins->sreg1);
2948                                 amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
2949                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2950                         }
2951                         break;
2952                 case OP_X86_FP_LOAD_I8:
2953                         if (use_sse2)
2954                                 g_assert_not_reached ();
2955                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2956                         break;
2957                 case OP_X86_FP_LOAD_I4:
2958                         if (use_sse2)
2959                                 g_assert_not_reached ();
2960                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2961                         break;
2962                 case OP_FCONV_TO_I1:
2963                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
2964                         break;
2965                 case OP_FCONV_TO_U1:
2966                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
2967                         break;
2968                 case OP_FCONV_TO_I2:
2969                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
2970                         break;
2971                 case OP_FCONV_TO_U2:
2972                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
2973                         break;
2974                 case OP_FCONV_TO_I4:
2975                 case OP_FCONV_TO_I:
2976                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
2977                         break;
2978                 case OP_FCONV_TO_I8:
2979                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
2980                         break;
2981                 case OP_LCONV_TO_R_UN: { 
2982                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2983                         guint8 *br;
2984
2985                         if (use_sse2)
2986                                 g_assert_not_reached ();
2987
2988                         /* load 64bit integer to FP stack */
2989                         amd64_push_imm (code, 0);
2990                         amd64_push_reg (code, ins->sreg2);
2991                         amd64_push_reg (code, ins->sreg1);
2992                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
2993                         /* store as 80bit FP value */
2994                         x86_fst80_membase (code, AMD64_RSP, 0);
2995                         
2996                         /* test if lreg is negative */
2997                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
2998                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2999         
3000                         /* add correction constant mn */
3001                         x86_fld80_mem (code, mn);
3002                         x86_fld80_membase (code, AMD64_RSP, 0);
3003                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3004                         x86_fst80_membase (code, AMD64_RSP, 0);
3005
3006                         amd64_patch (br, code);
3007
3008                         x86_fld80_membase (code, AMD64_RSP, 0);
3009                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3010
3011                         break;
3012                 }
3013                 case OP_LCONV_TO_OVF_I: {
3014                         guint8 *br [3], *label [1];
3015
3016                         if (use_sse2)
3017                                 g_assert_not_reached ();
3018
3019                         /* 
3020                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3021                          */
3022                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3023
3024                         /* If the low word top bit is set, see if we are negative */
3025                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3026                         /* We are not negative (no top bit set, check for our top word to be zero */
3027                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3028                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3029                         label [0] = code;
3030
3031                         /* throw exception */
3032                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3033                         x86_jump32 (code, 0);
3034         
3035                         amd64_patch (br [0], code);
3036                         /* our top bit is set, check that top word is 0xfffffff */
3037                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3038                 
3039                         amd64_patch (br [1], code);
3040                         /* nope, emit exception */
3041                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3042                         amd64_patch (br [2], label [0]);
3043
3044                         if (ins->dreg != ins->sreg1)
3045                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3046                         break;
3047                 }
3048                 case CEE_CONV_OVF_U4:
3049                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
3050                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
3051                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3052                         break;
3053                 case CEE_CONV_OVF_I4_UN:
3054                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
3055                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
3056                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3057                         break;
3058                 case OP_FMOVE:
3059                         if (use_sse2 && (ins->dreg != ins->sreg1))
3060                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
3061                         break;
3062                 case OP_FADD:
3063                         if (use_sse2)
3064                                 amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
3065                         else
3066                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3067                         break;
3068                 case OP_FSUB:
3069                         if (use_sse2)
3070                                 amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
3071                         else
3072                                 amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3073                         break;          
3074                 case OP_FMUL:
3075                         if (use_sse2)
3076                                 amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
3077                         else
3078                                 amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3079                         break;          
3080                 case OP_FDIV:
3081                         if (use_sse2)
3082                                 amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
3083                         else
3084                                 amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3085                         break;          
3086                 case OP_FNEG:
3087                         if (use_sse2) {
3088                                 amd64_mov_reg_imm_size (code, AMD64_R11, 0x8000000000000000, 8);
3089                                 amd64_push_reg (code, AMD64_R11);
3090                                 amd64_push_reg (code, AMD64_R11);
3091                                 amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RSP, 0);
3092                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
3093                         }
3094                         else
3095                                 amd64_fchs (code);
3096                         break;          
3097                 case OP_SIN:
3098                         if (use_sse2) {
3099                                 EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
3100                         }
3101                         else {
3102                                 amd64_fsin (code);
3103                                 amd64_fldz (code);
3104                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3105                         }
3106                         break;          
3107                 case OP_COS:
3108                         if (use_sse2) {
3109                                 EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
3110                         }
3111                         else {
3112                                 amd64_fcos (code);
3113                                 amd64_fldz (code);
3114                                 amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3115                         }
3116                         break;          
3117                 case OP_ABS:
3118                         if (use_sse2) {
3119                                 EMIT_SSE2_FPFUNC (code, fabs, ins->dreg, ins->sreg1);
3120                         }
3121                         else
3122                                 amd64_fabs (code);
3123                         break;          
3124                 case OP_TAN: {
3125                         /* 
3126                          * it really doesn't make sense to inline all this code,
3127                          * it's here just to show that things may not be as simple 
3128                          * as they appear.
3129                          */
3130                         guchar *check_pos, *end_tan, *pop_jump;
3131                         if (use_sse2)
3132                                 g_assert_not_reached ();
3133                         amd64_push_reg (code, AMD64_RAX);
3134                         amd64_fptan (code);
3135                         amd64_fnstsw (code);
3136                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3137                         check_pos = code;
3138                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3139                         amd64_fstp (code, 0); /* pop the 1.0 */
3140                         end_tan = code;
3141                         x86_jump8 (code, 0);
3142                         amd64_fldpi (code);
3143                         amd64_fp_op (code, X86_FADD, 0);
3144                         amd64_fxch (code, 1);
3145                         x86_fprem1 (code);
3146                         amd64_fstsw (code);
3147                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3148                         pop_jump = code;
3149                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3150                         amd64_fstp (code, 1);
3151                         amd64_fptan (code);
3152                         amd64_patch (pop_jump, code);
3153                         amd64_fstp (code, 0); /* pop the 1.0 */
3154                         amd64_patch (check_pos, code);
3155                         amd64_patch (end_tan, code);
3156                         amd64_fldz (code);
3157                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3158                         amd64_pop_reg (code, AMD64_RAX);
3159                         break;
3160                 }
3161                 case OP_ATAN:
3162                         if (use_sse2)
3163                                 g_assert_not_reached ();
3164                         x86_fld1 (code);
3165                         amd64_fpatan (code);
3166                         amd64_fldz (code);
3167                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3168                         break;          
3169                 case OP_SQRT:
3170                         if (use_sse2) {
3171                                 EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
3172                         }
3173                         else
3174                                 amd64_fsqrt (code);
3175                         break;          
3176                 case OP_X86_FPOP:
3177                         if (!use_sse2)
3178                                 amd64_fstp (code, 0);
3179                         break;          
3180                 case OP_FREM: {
3181                         guint8 *l1, *l2;
3182
3183                         if (use_sse2)
3184                                 g_assert_not_reached ();
3185                         amd64_push_reg (code, AMD64_RAX);
3186                         /* we need to exchange ST(0) with ST(1) */
3187                         amd64_fxch (code, 1);
3188
3189                         /* this requires a loop, because fprem somtimes 
3190                          * returns a partial remainder */
3191                         l1 = code;
3192                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3193                         /* x86_fprem1 (code); */
3194                         amd64_fprem (code);
3195                         amd64_fnstsw (code);
3196                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3197                         l2 = code + 2;
3198                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3199
3200                         /* pop result */
3201                         amd64_fstp (code, 1);
3202
3203                         amd64_pop_reg (code, AMD64_RAX);
3204                         break;
3205                 }
3206                 case OP_FCOMPARE:
3207                         if (use_sse2) {
3208                                 /* 
3209                                  * The two arguments are swapped because the fbranch instructions
3210                                  * depend on this for the non-sse case to work.
3211                                  */
3212                                 amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3213                                 break;
3214                         }
3215                         if (cfg->opt & MONO_OPT_FCMOV) {
3216                                 amd64_fcomip (code, 1);
3217                                 amd64_fstp (code, 0);
3218                                 break;
3219                         }
3220                         /* this overwrites EAX */
3221                         EMIT_FPCOMPARE(code);
3222                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3223                         break;
3224                 case OP_FCEQ:
3225                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3226                                 /* zeroing the register at the start results in 
3227                                  * shorter and faster code (we can also remove the widening op)
3228                                  */
3229                                 guchar *unordered_check;
3230                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3231                                 
3232                                 if (use_sse2)
3233                                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
3234                                 else {
3235                                         amd64_fcomip (code, 1);
3236                                         amd64_fstp (code, 0);
3237                                 }
3238                                 unordered_check = code;
3239                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3240                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3241                                 amd64_patch (unordered_check, code);
3242                                 break;
3243                         }
3244                         if (ins->dreg != AMD64_RAX) 
3245                                 amd64_push_reg (code, AMD64_RAX);
3246
3247                         EMIT_FPCOMPARE(code);
3248                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3249                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3250                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3251                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3252
3253                         if (ins->dreg != AMD64_RAX) 
3254                                 amd64_pop_reg (code, AMD64_RAX);
3255                         break;
3256                 case OP_FCLT:
3257                 case OP_FCLT_UN:
3258                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3259                                 /* zeroing the register at the start results in 
3260                                  * shorter and faster code (we can also remove the widening op)
3261                                  */
3262                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3263                                 if (use_sse2)
3264                                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3265                                 else {
3266                                         amd64_fcomip (code, 1);
3267                                         amd64_fstp (code, 0);
3268                                 }
3269                                 if (ins->opcode == OP_FCLT_UN) {
3270                                         guchar *unordered_check = code;
3271                                         guchar *jump_to_end;
3272                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3273                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3274                                         jump_to_end = code;
3275                                         x86_jump8 (code, 0);
3276                                         amd64_patch (unordered_check, code);
3277                                         amd64_inc_reg (code, ins->dreg);
3278                                         amd64_patch (jump_to_end, code);
3279                                 } else {
3280                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3281                                 }
3282                                 break;
3283                         }
3284                         if (ins->dreg != AMD64_RAX) 
3285                                 amd64_push_reg (code, AMD64_RAX);
3286
3287                         EMIT_FPCOMPARE(code);
3288                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3289                         if (ins->opcode == OP_FCLT_UN) {
3290                                 guchar *is_not_zero_check, *end_jump;
3291                                 is_not_zero_check = code;
3292                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3293                                 end_jump = code;
3294                                 x86_jump8 (code, 0);
3295                                 amd64_patch (is_not_zero_check, code);
3296                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3297
3298                                 amd64_patch (end_jump, code);
3299                         }
3300                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3301                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3302
3303                         if (ins->dreg != AMD64_RAX) 
3304                                 amd64_pop_reg (code, AMD64_RAX);
3305                         break;
3306                 case OP_FCGT:
3307                 case OP_FCGT_UN:
3308                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3309                                 /* zeroing the register at the start results in 
3310                                  * shorter and faster code (we can also remove the widening op)
3311                                  */
3312                                 guchar *unordered_check;
3313                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3314                                 if (use_sse2)
3315                                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
3316                                 else {
3317                                         amd64_fcomip (code, 1);
3318                                         amd64_fstp (code, 0);
3319                                 }
3320                                 if (ins->opcode == OP_FCGT) {
3321                                         unordered_check = code;
3322                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3323                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3324                                         amd64_patch (unordered_check, code);
3325                                 } else {
3326                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3327                                 }
3328                                 break;
3329                         }
3330                         if (ins->dreg != AMD64_RAX) 
3331                                 amd64_push_reg (code, AMD64_RAX);
3332
3333                         EMIT_FPCOMPARE(code);
3334                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3335                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3336                         if (ins->opcode == OP_FCGT_UN) {
3337                                 guchar *is_not_zero_check, *end_jump;
3338                                 is_not_zero_check = code;
3339                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3340                                 end_jump = code;
3341                                 x86_jump8 (code, 0);
3342                                 amd64_patch (is_not_zero_check, code);
3343                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3344
3345                                 amd64_patch (end_jump, code);
3346                         }
3347                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3348                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3349
3350                         if (ins->dreg != AMD64_RAX) 
3351                                 amd64_pop_reg (code, AMD64_RAX);
3352                         break;
3353                 case OP_FCLT_MEMBASE:
3354                 case OP_FCGT_MEMBASE:
3355                 case OP_FCLT_UN_MEMBASE:
3356                 case OP_FCGT_UN_MEMBASE:
3357                 case OP_FCEQ_MEMBASE: {
3358                         guchar *unordered_check, *jump_to_end;
3359                         int x86_cond;
3360                         g_assert (use_sse2);
3361
3362                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3363                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
3364
3365                         switch (ins->opcode) {
3366                         case OP_FCEQ_MEMBASE:
3367                                 x86_cond = X86_CC_EQ;
3368                                 break;
3369                         case OP_FCLT_MEMBASE:
3370                         case OP_FCLT_UN_MEMBASE:
3371                                 x86_cond = X86_CC_LT;
3372                                 break;
3373                         case OP_FCGT_MEMBASE:
3374                         case OP_FCGT_UN_MEMBASE:
3375                                 x86_cond = X86_CC_GT;
3376                                 break;
3377                         default:
3378                                 g_assert_not_reached ();
3379                         }
3380
3381                         unordered_check = code;
3382                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3383                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
3384
3385                         switch (ins->opcode) {
3386                         case OP_FCEQ_MEMBASE:
3387                         case OP_FCLT_MEMBASE:
3388                         case OP_FCGT_MEMBASE:
3389                                 amd64_patch (unordered_check, code);
3390                                 break;
3391                         case OP_FCLT_UN_MEMBASE:
3392                         case OP_FCGT_UN_MEMBASE:
3393                                 jump_to_end = code;
3394                                 x86_jump8 (code, 0);
3395                                 amd64_patch (unordered_check, code);
3396                                 amd64_inc_reg (code, ins->dreg);
3397                                 amd64_patch (jump_to_end, code);
3398                                 break;
3399                         default:
3400                                 break;
3401                         }
3402                         break;
3403                 }
3404                 case OP_FBEQ:
3405                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3406                                 guchar *jump = code;
3407                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3408                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3409                                 amd64_patch (jump, code);
3410                                 break;
3411                         }
3412                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3413                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3414                         break;
3415                 case OP_FBNE_UN:
3416                         /* Branch if C013 != 100 */
3417                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3418                                 /* branch if !ZF or (PF|CF) */
3419                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3420                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3421                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3422                                 break;
3423                         }
3424                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3425                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3426                         break;
3427                 case OP_FBLT:
3428                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3429                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3430                                 break;
3431                         }
3432                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3433                         break;
3434                 case OP_FBLT_UN:
3435                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3436                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3437                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3438                                 break;
3439                         }
3440                         if (ins->opcode == OP_FBLT_UN) {
3441                                 guchar *is_not_zero_check, *end_jump;
3442                                 is_not_zero_check = code;
3443                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3444                                 end_jump = code;
3445                                 x86_jump8 (code, 0);
3446                                 amd64_patch (is_not_zero_check, code);
3447                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3448
3449                                 amd64_patch (end_jump, code);
3450                         }
3451                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3452                         break;
3453                 case OP_FBGT:
3454                 case OP_FBGT_UN:
3455                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3456                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3457                                 break;
3458                         }
3459                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3460                         if (ins->opcode == OP_FBGT_UN) {
3461                                 guchar *is_not_zero_check, *end_jump;
3462                                 is_not_zero_check = code;
3463                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3464                                 end_jump = code;
3465                                 x86_jump8 (code, 0);
3466                                 amd64_patch (is_not_zero_check, code);
3467                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3468
3469                                 amd64_patch (end_jump, code);
3470                         }
3471                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3472                         break;
3473                 case OP_FBGE:
3474                         /* Branch if C013 == 100 or 001 */
3475                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3476                                 guchar *br1;
3477
3478                                 /* skip branch if C1=1 */
3479                                 br1 = code;
3480                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3481                                 /* branch if (C0 | C3) = 1 */
3482                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3483                                 amd64_patch (br1, code);
3484                                 break;
3485                         }
3486                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3487                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3488                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
3489                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3490                         break;
3491                 case OP_FBGE_UN:
3492                         /* Branch if C013 == 000 */
3493                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3494                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3495                                 break;
3496                         }
3497                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3498                         break;
3499                 case OP_FBLE:
3500                         /* Branch if C013=000 or 100 */
3501                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3502                                 guchar *br1;
3503
3504                                 /* skip branch if C1=1 */
3505                                 br1 = code;
3506                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3507                                 /* branch if C0=0 */
3508                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3509                                 amd64_patch (br1, code);
3510                                 break;
3511                         }
3512                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
3513                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
3514                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3515                         break;
3516                 case OP_FBLE_UN:
3517                         /* Branch if C013 != 001 */
3518                         if (use_sse2 || (cfg->opt & MONO_OPT_FCMOV)) {
3519                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3520                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3521                                 break;
3522                         }
3523                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3524                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3525                         break;
3526                 case CEE_CKFINITE: {
3527                         if (use_sse2) {
3528                                 /* Transfer value to the fp stack */
3529                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
3530                                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
3531                                 amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
3532                         }
3533                         amd64_push_reg (code, AMD64_RAX);
3534                         amd64_fxam (code);
3535                         amd64_fnstsw (code);
3536                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
3537                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
3538                         amd64_pop_reg (code, AMD64_RAX);
3539                         if (use_sse2) {
3540                                 amd64_fstp (code, 0);
3541                         }                               
3542                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3543                         if (use_sse2)
3544                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
3545                         break;
3546                 }
3547                 case OP_TLS_GET: {
3548                         x86_prefix (code, X86_FS_PREFIX);
3549                         amd64_mov_reg_mem (code, ins->dreg, ins->inst_offset, 8);
3550                         break;
3551                 }
3552                 case OP_MEMORY_BARRIER: {
3553                         /* Not needed on amd64 */
3554                         break;
3555                 }
3556                 case OP_ATOMIC_ADD_I4:
3557                 case OP_ATOMIC_ADD_I8: {
3558                         int dreg = ins->dreg;
3559                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
3560
3561                         if (dreg == ins->inst_basereg)
3562                                 dreg = AMD64_R11;
3563                         
3564                         if (dreg != ins->sreg2)
3565                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg2, size);
3566
3567                         x86_prefix (code, X86_LOCK_PREFIX);
3568                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3569
3570                         if (dreg != ins->dreg)
3571                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3572
3573                         break;
3574                 }
3575                 case OP_ATOMIC_ADD_NEW_I4:
3576                 case OP_ATOMIC_ADD_NEW_I8: {
3577                         int dreg = ins->dreg;
3578                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_NEW_I4) ? 4 : 8;
3579
3580                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
3581                                 dreg = AMD64_R11;
3582
3583                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
3584                         amd64_prefix (code, X86_LOCK_PREFIX);
3585                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
3586                         /* dreg contains the old value, add with sreg2 value */
3587                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
3588                         
3589                         if (ins->dreg != dreg)
3590                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
3591
3592                         break;
3593                 }
3594                 case OP_ATOMIC_EXCHANGE_I4:
3595                 case OP_ATOMIC_EXCHANGE_I8: {
3596                         guchar *br[2];
3597                         int sreg2 = ins->sreg2;
3598                         int breg = ins->inst_basereg;
3599                         guint32 size = (ins->opcode == OP_ATOMIC_EXCHANGE_I4) ? 4 : 8;
3600
3601                         /* 
3602                          * See http://msdn.microsoft.com/msdnmag/issues/0700/Win32/ for
3603                          * an explanation of how this works.
3604                          */
3605
3606                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3607                          * hack to overcome limits in x86 reg allocator 
3608                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3609                          */
3610                         if (ins->dreg != AMD64_RAX)
3611                                 amd64_push_reg (code, AMD64_RAX);
3612                         
3613                         /* We need the EAX reg for the cmpxchg */
3614                         if (ins->sreg2 == AMD64_RAX) {
3615                                 amd64_push_reg (code, AMD64_RDX);
3616                                 amd64_mov_reg_reg (code, AMD64_RDX, AMD64_RAX, size);
3617                                 sreg2 = AMD64_RDX;
3618                         }
3619
3620                         if (breg == AMD64_RAX) {
3621                                 amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, size);
3622                                 breg = AMD64_R11;
3623                         }
3624
3625                         amd64_mov_reg_membase (code, AMD64_RAX, breg, ins->inst_offset, size);
3626
3627                         br [0] = code; amd64_prefix (code, X86_LOCK_PREFIX);
3628                         amd64_cmpxchg_membase_reg_size (code, breg, ins->inst_offset, sreg2, size);
3629                         br [1] = code; amd64_branch8 (code, X86_CC_NE, -1, FALSE);
3630                         amd64_patch (br [1], br [0]);
3631
3632                         if (ins->dreg != AMD64_RAX) {
3633                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
3634                                 amd64_pop_reg (code, AMD64_RAX);
3635                         }
3636
3637                         if (ins->sreg2 != sreg2)
3638                                 amd64_pop_reg (code, AMD64_RDX);
3639
3640                         break;
3641                 }
3642                 default:
3643                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3644                         g_assert_not_reached ();
3645                 }
3646
3647                 if ((code - cfg->native_code - offset) > max_len) {
3648                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
3649                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3650                         g_assert_not_reached ();
3651                 }
3652                
3653                 cpos += max_len;
3654
3655                 last_ins = ins;
3656                 last_offset = offset;
3657                 
3658                 ins = ins->next;
3659         }
3660
3661         cfg->code_len = code - cfg->native_code;
3662 }
3663
3664 void
3665 mono_arch_register_lowlevel_calls (void)
3666 {
3667 }
3668
3669 void
3670 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3671 {
3672         MonoJumpInfo *patch_info;
3673         gboolean compile_aot = !run_cctors;
3674
3675         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3676                 unsigned char *ip = patch_info->ip.i + code;
3677                 const unsigned char *target;
3678
3679                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3680
3681                 if (compile_aot) {
3682                         switch (patch_info->type) {
3683                         case MONO_PATCH_INFO_BB:
3684                         case MONO_PATCH_INFO_LABEL:
3685                                 break;
3686                         default:
3687                                 /* No need to patch these */
3688                                 continue;
3689                         }
3690                 }
3691
3692                 switch (patch_info->type) {
3693                 case MONO_PATCH_INFO_NONE:
3694                         continue;
3695                 case MONO_PATCH_INFO_CLASS_INIT: {
3696                         /* Might already been changed to a nop */
3697                         guint8* ip2 = ip;
3698                         amd64_call_code (ip2, 0);
3699                         break;
3700                 }
3701                 case MONO_PATCH_INFO_METHOD_REL:
3702                 case MONO_PATCH_INFO_R8:
3703                 case MONO_PATCH_INFO_R4:
3704                         g_assert_not_reached ();
3705                         continue;
3706                 case MONO_PATCH_INFO_BB:
3707                         break;
3708                 default:
3709                         break;
3710                 }
3711                 amd64_patch (ip, (gpointer)target);
3712         }
3713 }
3714
3715 guint8 *
3716 mono_arch_emit_prolog (MonoCompile *cfg)
3717 {
3718         MonoMethod *method = cfg->method;
3719         MonoBasicBlock *bb;
3720         MonoMethodSignature *sig;
3721         MonoInst *inst;
3722         int alloc_size, pos, max_offset, i, quad;
3723         guint8 *code;
3724         CallInfo *cinfo;
3725
3726         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
3727         code = cfg->native_code = g_malloc (cfg->code_size);
3728
3729         amd64_push_reg (code, AMD64_RBP);
3730         amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
3731
3732         /* Stack alignment check */
3733 #if 0
3734         {
3735                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
3736                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
3737                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
3738                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
3739                 amd64_breakpoint (code);
3740         }
3741 #endif
3742
3743         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
3744         pos = 0;
3745
3746         if (method->save_lmf) {
3747                 gint32 lmf_offset;
3748
3749                 pos = ALIGN_TO (pos + sizeof (MonoLMF), 16);
3750
3751                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, pos);
3752
3753                 lmf_offset = - cfg->arch.lmf_offset;
3754
3755                 /* Save ip */
3756                 amd64_lea_membase (code, AMD64_R11, AMD64_RIP, 0);
3757                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8);
3758                 /* Save fp */
3759                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), AMD64_RBP, 8);
3760                 /* Save method */
3761                 /* FIXME: add a relocation for this */
3762                 if (IS_IMM32 (cfg->method))
3763                         amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), (guint64)cfg->method, 8);
3764                 else {
3765                         amd64_mov_reg_imm (code, AMD64_R11, cfg->method);
3766                         amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8);
3767                 }
3768                 /* Save callee saved regs */
3769                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
3770                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
3771                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
3772                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
3773                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
3774         } else {
3775
3776                 for (i = 0; i < AMD64_NREG; ++i)
3777                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3778                                 amd64_push_reg (code, i);
3779                                 pos += sizeof (gpointer);
3780                         }
3781         }
3782
3783         alloc_size -= pos;
3784
3785         if (alloc_size) {
3786                 /* See mono_emit_stack_alloc */
3787 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3788                 guint32 remaining_size = alloc_size;
3789                 while (remaining_size >= 0x1000) {
3790                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3791                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3792                         remaining_size -= 0x1000;
3793                 }
3794                 if (remaining_size)
3795                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
3796 #else
3797                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
3798 #endif
3799         }
3800
3801         /* compute max_offset in order to use short forward jumps */
3802         max_offset = 0;
3803         if (cfg->opt & MONO_OPT_BRANCH) {
3804                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3805                         MonoInst *ins = bb->code;
3806                         bb->max_offset = max_offset;
3807
3808                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3809                                 max_offset += 6;
3810                         /* max alignment for loops */
3811                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3812                                 max_offset += LOOP_ALIGNMENT;
3813
3814                         while (ins) {
3815                                 if (ins->opcode == OP_LABEL)
3816                                         ins->inst_c1 = max_offset;
3817                                 
3818                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3819                                 ins = ins->next;
3820                         }
3821                 }
3822         }
3823
3824         sig = mono_method_signature (method);
3825         pos = 0;
3826
3827         cinfo = get_call_info (sig, FALSE);
3828
3829         if (sig->ret->type != MONO_TYPE_VOID) {
3830                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
3831                         /* Save volatile arguments to the stack */
3832                         amd64_mov_membase_reg (code, cfg->ret->inst_basereg, cfg->ret->inst_offset, cinfo->ret.reg, 8);
3833                 }
3834         }
3835
3836         /* Keep this in sync with emit_load_volatile_arguments */
3837         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3838                 ArgInfo *ainfo = cinfo->args + i;
3839                 gint32 stack_offset;
3840                 MonoType *arg_type;
3841                 inst = cfg->varinfo [i];
3842
3843                 if (sig->hasthis && (i == 0))
3844                         arg_type = &mono_defaults.object_class->byval_arg;
3845                 else
3846                         arg_type = sig->params [i - sig->hasthis];
3847
3848                 stack_offset = ainfo->offset + ARGS_OFFSET;
3849
3850                 /* Save volatile arguments to the stack */
3851                 if (inst->opcode != OP_REGVAR) {
3852                         switch (ainfo->storage) {
3853                         case ArgInIReg: {
3854                                 guint32 size = 8;
3855
3856                                 /* FIXME: I1 etc */
3857                                 /*
3858                                 if (stack_offset & 0x1)
3859                                         size = 1;
3860                                 else if (stack_offset & 0x2)
3861                                         size = 2;
3862                                 else if (stack_offset & 0x4)
3863                                         size = 4;
3864                                 else
3865                                         size = 8;
3866                                 */
3867                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, size);
3868                                 break;
3869                         }
3870                         case ArgInFloatSSEReg:
3871                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
3872                                 break;
3873                         case ArgInDoubleSSEReg:
3874                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
3875                                 break;
3876                         case ArgValuetypeInReg:
3877                                 for (quad = 0; quad < 2; quad ++) {
3878                                         switch (ainfo->pair_storage [quad]) {
3879                                         case ArgInIReg:
3880                                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad], sizeof (gpointer));
3881                                                 break;
3882                                         case ArgInFloatSSEReg:
3883                                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
3884                                                 break;
3885                                         case ArgInDoubleSSEReg:
3886                                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), ainfo->pair_regs [quad]);
3887                                                 break;
3888                                         case ArgNone:
3889                                                 break;
3890                                         default:
3891                                                 g_assert_not_reached ();
3892                                         }
3893                                 }
3894                                 break;
3895                         default:
3896                                 break;
3897                         }
3898                 }
3899
3900                 if (inst->opcode == OP_REGVAR) {
3901                         /* Argument allocated to (non-volatile) register */
3902                         switch (ainfo->storage) {
3903                         case ArgInIReg:
3904                                 amd64_mov_reg_reg (code, inst->dreg, ainfo->reg, 8);
3905                                 break;
3906                         case ArgOnStack:
3907                                 amd64_mov_reg_membase (code, inst->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
3908                                 break;
3909                         default:
3910                                 g_assert_not_reached ();
3911                         }
3912                 }
3913         }
3914
3915         if (method->save_lmf) {
3916                 gint32 lmf_offset;
3917
3918                 if (lmf_tls_offset != -1) {
3919                         /* Load lmf quicky using the FS register */
3920                         x86_prefix (code, X86_FS_PREFIX);
3921                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
3922                 }
3923                 else {
3924                         /* 
3925                          * The call might clobber argument registers, but they are already
3926                          * saved to the stack/global regs.
3927                          */
3928
3929                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3930                                                                  (gpointer)"mono_get_lmf_addr");                
3931                 }
3932
3933                 lmf_offset = - cfg->arch.lmf_offset;
3934
3935                 /* Save lmf_addr */
3936                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
3937                 /* Save previous_lmf */
3938                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
3939                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
3940                 /* Set new lmf */
3941                 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
3942                 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
3943         }
3944
3945
3946         g_free (cinfo);
3947
3948         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3949                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3950
3951         cfg->code_len = code - cfg->native_code;
3952
3953         g_assert (cfg->code_len < cfg->code_size);
3954
3955         return code;
3956 }
3957
3958 void
3959 mono_arch_emit_epilog (MonoCompile *cfg)
3960 {
3961         MonoMethod *method = cfg->method;
3962         int quad, pos, i;
3963         guint8 *code;
3964         int max_epilog_size = 16;
3965         CallInfo *cinfo;
3966         
3967         if (cfg->method->save_lmf)
3968                 max_epilog_size += 256;
3969         
3970         if (mono_jit_trace_calls != NULL)
3971                 max_epilog_size += 50;
3972
3973         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3974                 max_epilog_size += 50;
3975
3976         max_epilog_size += (AMD64_NREG * 2);
3977
3978         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3979                 cfg->code_size *= 2;
3980                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3981                 mono_jit_stats.code_reallocs++;
3982         }
3983
3984         code = cfg->native_code + cfg->code_len;
3985
3986         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3987                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3988
3989         /* the code restoring the registers must be kept in sync with CEE_JMP */
3990         pos = 0;
3991         
3992         if (method->save_lmf) {
3993                 gint32 lmf_offset = - cfg->arch.lmf_offset;
3994
3995                 /* Restore previous lmf */
3996                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
3997                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
3998                 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
3999
4000                 /* Restore caller saved regs */
4001                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4002                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4003                 }
4004                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4005                         amd64_mov_reg_membase (code, AMD64_R12, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4006                 }
4007                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4008                         amd64_mov_reg_membase (code, AMD64_R13, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4009                 }
4010                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4011                         amd64_mov_reg_membase (code, AMD64_R14, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4012                 }
4013                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4014                         amd64_mov_reg_membase (code, AMD64_R15, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4015                 }
4016         } else {
4017
4018                 for (i = 0; i < AMD64_NREG; ++i)
4019                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4020                                 pos -= sizeof (gpointer);
4021
4022                 if (pos) {
4023                         if (pos == - sizeof (gpointer)) {
4024                                 /* Only one register, so avoid lea */
4025                                 for (i = AMD64_NREG - 1; i > 0; --i)
4026                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4027                                                 amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4028                                         }
4029                         }
4030                         else {
4031                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4032
4033                                 /* Pop registers in reverse order */
4034                                 for (i = AMD64_NREG - 1; i > 0; --i)
4035                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4036                                                 amd64_pop_reg (code, i);
4037                                         }
4038                         }
4039                 }
4040         }
4041
4042         /* Load returned vtypes into registers if needed */
4043         cinfo = get_call_info (mono_method_signature (method), FALSE);
4044         if (cinfo->ret.storage == ArgValuetypeInReg) {
4045                 ArgInfo *ainfo = &cinfo->ret;
4046                 MonoInst *inst = cfg->ret;
4047
4048                 for (quad = 0; quad < 2; quad ++) {
4049                         switch (ainfo->pair_storage [quad]) {
4050                         case ArgInIReg:
4051                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)), sizeof (gpointer));
4052                                 break;
4053                         case ArgInFloatSSEReg:
4054                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4055                                 break;
4056                         case ArgInDoubleSSEReg:
4057                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof (gpointer)));
4058                                 break;
4059                         case ArgNone:
4060                                 break;
4061                         default:
4062                                 g_assert_not_reached ();
4063                         }
4064                 }
4065         }
4066         g_free (cinfo);
4067
4068         amd64_leave (code);
4069         amd64_ret (code);
4070
4071         cfg->code_len = code - cfg->native_code;
4072
4073         g_assert (cfg->code_len < cfg->code_size);
4074
4075 }
4076
4077 void
4078 mono_arch_emit_exceptions (MonoCompile *cfg)
4079 {
4080         MonoJumpInfo *patch_info;
4081         int nthrows, i;
4082         guint8 *code;
4083         MonoClass *exc_classes [16];
4084         guint8 *exc_throw_start [16], *exc_throw_end [16];
4085         guint32 code_size = 0;
4086
4087         /* Compute needed space */
4088         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4089                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4090                         code_size += 40;
4091                 if (patch_info->type == MONO_PATCH_INFO_R8)
4092                         code_size += 8 + 7; /* sizeof (double) + alignment */
4093                 if (patch_info->type == MONO_PATCH_INFO_R4)
4094                         code_size += 4 + 7; /* sizeof (float) + alignment */
4095         }
4096
4097         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4098                 cfg->code_size *= 2;
4099                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4100                 mono_jit_stats.code_reallocs++;
4101         }
4102
4103         code = cfg->native_code + cfg->code_len;
4104
4105         /* add code to raise exceptions */
4106         nthrows = 0;
4107         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4108                 switch (patch_info->type) {
4109                 case MONO_PATCH_INFO_EXC: {
4110                         MonoClass *exc_class;
4111                         guint8 *buf, *buf2;
4112                         guint32 throw_ip;
4113
4114                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4115
4116                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4117                         g_assert (exc_class);
4118                         throw_ip = patch_info->ip.i;
4119
4120                         //x86_breakpoint (code);
4121                         /* Find a throw sequence for the same exception class */
4122                         for (i = 0; i < nthrows; ++i)
4123                                 if (exc_classes [i] == exc_class)
4124                                         break;
4125                         if (i < nthrows) {
4126                                 amd64_mov_reg_imm (code, AMD64_RSI, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4127                                 x86_jump_code (code, exc_throw_start [i]);
4128                                 patch_info->type = MONO_PATCH_INFO_NONE;
4129                         }
4130                         else {
4131                                 buf = code;
4132                                 amd64_mov_reg_imm_size (code, AMD64_RSI, 0xf0f0f0f0, 4);
4133                                 buf2 = code;
4134
4135                                 if (nthrows < 16) {
4136                                         exc_classes [nthrows] = exc_class;
4137                                         exc_throw_start [nthrows] = code;
4138                                 }
4139
4140                                 amd64_mov_reg_imm (code, AMD64_RDI, exc_class->type_token);
4141                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4142                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4143                                 patch_info->ip.i = code - cfg->native_code;
4144
4145                                 if (cfg->compile_aot) {
4146                                         amd64_mov_reg_membase (code, GP_SCRATCH_REG, AMD64_RIP, 0, 8);
4147                                         amd64_call_reg (code, GP_SCRATCH_REG);
4148                                 } else {
4149                                         /* The callee is in memory allocated using the code manager */
4150                                         amd64_call_code (code, 0);
4151                                 }
4152
4153                                 amd64_mov_reg_imm (buf, AMD64_RSI, (code - cfg->native_code) - throw_ip);
4154                                 while (buf < buf2)
4155                                         x86_nop (buf);
4156
4157                                 if (nthrows < 16) {
4158                                         exc_throw_end [nthrows] = code;
4159                                         nthrows ++;
4160                                 }
4161                         }
4162                         break;
4163                 }
4164                 default:
4165                         /* do nothing */
4166                         break;
4167                 }
4168         }
4169
4170         /* Handle relocations with RIP relative addressing */
4171         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4172                 gboolean remove = FALSE;
4173
4174                 switch (patch_info->type) {
4175                 case MONO_PATCH_INFO_R8: {
4176                         guint8 *pos;
4177
4178                         code = (guint8*)ALIGN_TO (code, 8);
4179
4180                         pos = cfg->native_code + patch_info->ip.i;
4181
4182                         *(double*)code = *(double*)patch_info->data.target;
4183
4184                         if (use_sse2)
4185                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
4186                         else
4187                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4188                         code += 8;
4189
4190                         remove = TRUE;
4191                         break;
4192                 }
4193                 case MONO_PATCH_INFO_R4: {
4194                         guint8 *pos;
4195
4196                         code = (guint8*)ALIGN_TO (code, 8);
4197
4198                         pos = cfg->native_code + patch_info->ip.i;
4199
4200                         *(float*)code = *(float*)patch_info->data.target;
4201
4202                         if (use_sse2)
4203                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
4204                         else
4205                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4206                         code += 4;
4207
4208                         remove = TRUE;
4209                         break;
4210                 }
4211                 default:
4212                         break;
4213                 }
4214
4215                 if (remove) {
4216                         if (patch_info == cfg->patch_info)
4217                                 cfg->patch_info = patch_info->next;
4218                         else {
4219                                 MonoJumpInfo *tmp;
4220
4221                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4222                                         ;
4223                                 tmp->next = patch_info->next;
4224                         }
4225                 }
4226         }
4227
4228         cfg->code_len = code - cfg->native_code;
4229
4230         g_assert (cfg->code_len < cfg->code_size);
4231
4232 }
4233
4234 void*
4235 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4236 {
4237         guchar *code = p;
4238         CallInfo *cinfo = NULL;
4239         MonoMethodSignature *sig;
4240         MonoInst *inst;
4241         int i, n, stack_area = 0;
4242
4243         /* Keep this in sync with mono_arch_get_argument_info */
4244
4245         if (enable_arguments) {
4246                 /* Allocate a new area on the stack and save arguments there */
4247                 sig = mono_method_signature (cfg->method);
4248
4249                 cinfo = get_call_info (sig, FALSE);
4250
4251                 n = sig->param_count + sig->hasthis;
4252
4253                 stack_area = ALIGN_TO (n * 8, 16);
4254
4255                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4256
4257                 for (i = 0; i < n; ++i) {
4258                         inst = cfg->varinfo [i];
4259
4260                         if (inst->opcode == OP_REGVAR)
4261                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
4262                         else {
4263                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4264                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4265                         }
4266                 }
4267         }
4268
4269         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4270         amd64_set_reg_template (code, AMD64_RDI);
4271         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RSP, 8);
4272         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4273
4274         if (enable_arguments) {
4275                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4276
4277                 g_free (cinfo);
4278         }
4279
4280         return code;
4281 }
4282
4283 enum {
4284         SAVE_NONE,
4285         SAVE_STRUCT,
4286         SAVE_EAX,
4287         SAVE_EAX_EDX,
4288         SAVE_XMM
4289 };
4290
4291 void*
4292 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4293 {
4294         guchar *code = p;
4295         int save_mode = SAVE_NONE;
4296         MonoMethod *method = cfg->method;
4297         int rtype = mono_type_get_underlying_type (mono_method_signature (method)->ret)->type;
4298         
4299         switch (rtype) {
4300         case MONO_TYPE_VOID:
4301                 /* special case string .ctor icall */
4302                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4303                         save_mode = SAVE_EAX;
4304                 else
4305                         save_mode = SAVE_NONE;
4306                 break;
4307         case MONO_TYPE_I8:
4308         case MONO_TYPE_U8:
4309                 save_mode = SAVE_EAX;
4310                 break;
4311         case MONO_TYPE_R4:
4312         case MONO_TYPE_R8:
4313                 save_mode = SAVE_XMM;
4314                 break;
4315         case MONO_TYPE_VALUETYPE:
4316                 save_mode = SAVE_STRUCT;
4317                 break;
4318         default:
4319                 save_mode = SAVE_EAX;
4320                 break;
4321         }
4322
4323         /* Save the result and copy it into the proper argument register */
4324         switch (save_mode) {
4325         case SAVE_EAX:
4326                 amd64_push_reg (code, AMD64_RAX);
4327                 /* Align stack */
4328                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4329                 if (enable_arguments)
4330                         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RAX, 8);
4331                 break;
4332         case SAVE_STRUCT:
4333                 /* FIXME: */
4334                 if (enable_arguments)
4335                         amd64_mov_reg_imm (code, AMD64_RSI, 0);
4336                 break;
4337         case SAVE_XMM:
4338                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4339                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4340                 /* Align stack */
4341                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4342                 /* 
4343                  * The result is already in the proper argument register so no copying
4344                  * needed.
4345                  */
4346                 break;
4347         case SAVE_NONE:
4348                 break;
4349         default:
4350                 g_assert_not_reached ();
4351         }
4352
4353         /* Set %al since this is a varargs call */
4354         if (save_mode == SAVE_XMM)
4355                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4356         else
4357                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4358
4359         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4360         amd64_set_reg_template (code, AMD64_RDI);
4361         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4362
4363         /* Restore result */
4364         switch (save_mode) {
4365         case SAVE_EAX:
4366                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4367                 amd64_pop_reg (code, AMD64_RAX);
4368                 break;
4369         case SAVE_STRUCT:
4370                 /* FIXME: */
4371                 break;
4372         case SAVE_XMM:
4373                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4374                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4375                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4376                 break;
4377         case SAVE_NONE:
4378                 break;
4379         default:
4380                 g_assert_not_reached ();
4381         }
4382
4383         return code;
4384 }
4385
4386 void
4387 mono_arch_flush_icache (guint8 *code, gint size)
4388 {
4389         /* Not needed */
4390 }
4391
4392 void
4393 mono_arch_flush_register_windows (void)
4394 {
4395 }
4396
4397 gboolean 
4398 mono_arch_is_inst_imm (gint64 imm)
4399 {
4400         return amd64_is_imm32 (imm);
4401 }
4402
4403 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
4404
4405 static int reg_to_ucontext_reg [] = {
4406         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
4407         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
4408         REG_RIP
4409 };
4410
4411 /*
4412  * Determine whenever the trap whose info is in SIGINFO is caused by
4413  * integer overflow.
4414  */
4415 gboolean
4416 mono_arch_is_int_overflow (void *sigctx, void *info)
4417 {
4418         ucontext_t *ctx = (ucontext_t*)sigctx;
4419         guint8* rip;
4420         int reg;
4421
4422         rip = (guint8*)ctx->uc_mcontext.gregs [REG_RIP];
4423
4424         if (IS_REX (rip [0])) {
4425                 reg = amd64_rex_b (rip [0]);
4426                 rip ++;
4427         }
4428         else
4429                 reg = 0;
4430
4431         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4432                 /* idiv REG */
4433                 reg += x86_modrm_rm (rip [1]);
4434
4435                 if (ctx->uc_mcontext.gregs [reg_to_ucontext_reg [reg]] == -1)
4436                         return TRUE;
4437         }
4438
4439         return FALSE;
4440 }
4441
4442 guint32
4443 mono_arch_get_patch_offset (guint8 *code)
4444 {
4445         return 3;
4446 }
4447
4448 gpointer*
4449 mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
4450 {
4451         guint32 reg;
4452         guint32 disp;
4453         guint8 rex = 0;
4454
4455         /* go to the start of the call instruction
4456          *
4457          * address_byte = (m << 6) | (o << 3) | reg
4458          * call opcode: 0xff address_byte displacement
4459          * 0xff m=1,o=2 imm8
4460          * 0xff m=2,o=2 imm32
4461          */
4462         code -= 7;
4463
4464         /* 
4465          * A given byte sequence can match more than case here, so we have to be
4466          * really careful about the ordering of the cases. Longer sequences
4467          * come first.
4468          */
4469         if ((code [0] == 0x41) && (code [1] == 0xff) && (code [2] == 0x15)) {
4470                 /* call OFFSET(%rip) */
4471                 disp = *(guint32*)(code + 3);
4472                 return (gpointer*)(code + disp + 7);
4473         }
4474         else if ((code [1] == 0xff) && (amd64_modrm_reg (code [2]) == 0x2) && (amd64_modrm_mod (code [2]) == 0x2)) {
4475                 /* call *[reg+disp32] */
4476                 if (IS_REX (code [0]))
4477                         rex = code [0];
4478                 reg = amd64_modrm_rm (code [2]);
4479                 disp = *(guint32*)(code + 3);
4480                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4481         }
4482         else if (code [2] == 0xe8) {
4483                 /* call <ADDR> */
4484                 return NULL;
4485         }
4486         else if (IS_REX (code [4]) && (code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x3)) {
4487                 /* call *%reg */
4488                 return NULL;
4489         }
4490         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x1)) {
4491                 /* call *[reg+disp8] */
4492                 if (IS_REX (code [3]))
4493                         rex = code [3];
4494                 reg = amd64_modrm_rm (code [5]);
4495                 disp = *(guint8*)(code + 6);
4496                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4497         }
4498         else if ((code [5] == 0xff) && (amd64_modrm_reg (code [6]) == 0x2) && (amd64_modrm_mod (code [6]) == 0x0)) {
4499                         /*
4500                          * This is a interface call: should check the above code can't catch it earlier 
4501                          * 8b 40 30   mov    0x30(%eax),%eax
4502                          * ff 10      call   *(%eax)
4503                          */
4504                 if (IS_REX (code [4]))
4505                         rex = code [4];
4506                 reg = amd64_modrm_rm (code [6]);
4507                 disp = 0;
4508         }
4509         else
4510                 g_assert_not_reached ();
4511
4512         reg += amd64_rex_b (rex);
4513
4514         /* R11 is clobbered by the trampoline code */
4515         g_assert (reg != AMD64_R11);
4516
4517         return (gpointer)(((guint64)(regs [reg])) + disp);
4518 }
4519
4520 gpointer*
4521 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4522 {
4523         guint32 reg;
4524         guint32 disp;
4525
4526         code -= 10;
4527
4528         if (IS_REX (code [0]) && (code [1] == 0x8b) && (code [3] == 0x48) && (code [4] == 0x8b) && (code [5] == 0x40) && (code [7] == 0x48) && (code [8] == 0xff) && (code [9] == 0xd0)) {
4529                 /* mov REG, %rax; mov <OFFSET>(%rax), %rax; call *%rax */
4530                 reg = amd64_rex_b (code [0]) + amd64_modrm_rm (code [2]);
4531                 disp = code [6];
4532
4533                 if (reg == AMD64_RAX)
4534                         return NULL;
4535                 else
4536                         return (gpointer*)(((guint64)(regs [reg])) + disp);
4537         }
4538
4539         return NULL;
4540 }
4541
4542 /*
4543  * Support for fast access to the thread-local lmf structure using the GS
4544  * segment register on NPTL + kernel 2.6.x.
4545  */
4546
4547 static gboolean tls_offset_inited = FALSE;
4548
4549 void
4550 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4551 {
4552         if (!tls_offset_inited) {
4553                 tls_offset_inited = TRUE;
4554
4555                 appdomain_tls_offset = mono_domain_get_tls_offset ();
4556                 lmf_tls_offset = mono_get_lmf_tls_offset ();
4557                 thread_tls_offset = mono_thread_get_tls_offset ();
4558         }               
4559 }
4560
4561 void
4562 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4563 {
4564 }
4565
4566 void
4567 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4568 {
4569         MonoCallInst *call = (MonoCallInst*)inst;
4570         CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4571
4572         if (vt_reg != -1) {
4573                 MonoInst *vtarg;
4574
4575                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4576                         /*
4577                          * The valuetype is in RAX:RDX after the call, need to be copied to
4578                          * the stack. Push the address here, so the call instruction can
4579                          * access it.
4580                          */
4581                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
4582                         vtarg->sreg1 = vt_reg;
4583                         mono_bblock_add_inst (cfg->cbb, vtarg);
4584
4585                         /* Align stack */
4586                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
4587                 }
4588                 else {
4589                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4590                         vtarg->sreg1 = vt_reg;
4591                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4592                         mono_bblock_add_inst (cfg->cbb, vtarg);
4593
4594                         mono_call_inst_add_outarg_reg (call, vtarg->dreg, cinfo->ret.reg, FALSE);
4595                 }
4596         }
4597
4598         /* add the this argument */
4599         if (this_reg != -1) {
4600                 MonoInst *this;
4601                 MONO_INST_NEW (cfg, this, OP_MOVE);
4602                 this->type = this_type;
4603                 this->sreg1 = this_reg;
4604                 this->dreg = mono_regstate_next_int (cfg->rs);
4605                 mono_bblock_add_inst (cfg->cbb, this);
4606
4607                 mono_call_inst_add_outarg_reg (call, this->dreg, cinfo->args [0].reg, FALSE);
4608         }
4609
4610         g_free (cinfo);
4611 }
4612
4613 MonoInst*
4614 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4615 {
4616         MonoInst *ins = NULL;
4617
4618         if (cmethod->klass == mono_defaults.math_class) {
4619                 if (strcmp (cmethod->name, "Sin") == 0) {
4620                         MONO_INST_NEW (cfg, ins, OP_SIN);
4621                         ins->inst_i0 = args [0];
4622                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4623                         MONO_INST_NEW (cfg, ins, OP_COS);
4624                         ins->inst_i0 = args [0];
4625                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4626                         if (use_sse2)
4627                                 return ins;
4628                         MONO_INST_NEW (cfg, ins, OP_TAN);
4629                         ins->inst_i0 = args [0];
4630                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4631                         if (use_sse2)
4632                                 return ins;
4633                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4634                         ins->inst_i0 = args [0];
4635                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4636                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4637                         ins->inst_i0 = args [0];
4638                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4639                         MONO_INST_NEW (cfg, ins, OP_ABS);
4640                         ins->inst_i0 = args [0];
4641                 }
4642 #if 0
4643                 /* OP_FREM is not IEEE compatible */
4644                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4645                         MONO_INST_NEW (cfg, ins, OP_FREM);
4646                         ins->inst_i0 = args [0];
4647                         ins->inst_i1 = args [1];
4648                 }
4649 #endif
4650         } else if (cmethod->klass == mono_defaults.thread_class &&
4651                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4652                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4653         } else if(cmethod->klass->image == mono_defaults.corlib &&
4654                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4655                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4656
4657                 if (strcmp (cmethod->name, "Increment") == 0) {
4658                         MonoInst *ins_iconst;
4659                         guint32 opcode;
4660
4661                         if (fsig->params [0]->type == MONO_TYPE_I4)
4662                                 opcode = OP_ATOMIC_ADD_NEW_I4;
4663                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4664                                 opcode = OP_ATOMIC_ADD_NEW_I8;
4665                         else
4666                                 g_assert_not_reached ();
4667                         MONO_INST_NEW (cfg, ins, opcode);
4668                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4669                         ins_iconst->inst_c0 = 1;
4670
4671                         ins->inst_i0 = args [0];
4672                         ins->inst_i1 = ins_iconst;
4673                 } else if (strcmp (cmethod->name, "Decrement") == 0) {
4674                         MonoInst *ins_iconst;
4675                         guint32 opcode;
4676
4677                         if (fsig->params [0]->type == MONO_TYPE_I4)
4678                                 opcode = OP_ATOMIC_ADD_NEW_I4;
4679                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4680                                 opcode = OP_ATOMIC_ADD_NEW_I8;
4681                         else
4682                                 g_assert_not_reached ();
4683                         MONO_INST_NEW (cfg, ins, opcode);
4684                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4685                         ins_iconst->inst_c0 = -1;
4686
4687                         ins->inst_i0 = args [0];
4688                         ins->inst_i1 = ins_iconst;
4689                 } else if (strcmp (cmethod->name, "Add") == 0) {
4690                         guint32 opcode;
4691
4692                         if (fsig->params [0]->type == MONO_TYPE_I4)
4693                                 opcode = OP_ATOMIC_ADD_I4;
4694                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4695                                 opcode = OP_ATOMIC_ADD_I8;
4696                         else
4697                                 g_assert_not_reached ();
4698                         
4699                         MONO_INST_NEW (cfg, ins, opcode);
4700
4701                         ins->inst_i0 = args [0];
4702                         ins->inst_i1 = args [1];
4703                 } else if (strcmp (cmethod->name, "Exchange") == 0) {
4704                         guint32 opcode;
4705
4706                         if (fsig->params [0]->type == MONO_TYPE_I4)
4707                                 opcode = OP_ATOMIC_EXCHANGE_I4;
4708                         else if ((fsig->params [0]->type == MONO_TYPE_I8) ||
4709                                          (fsig->params [0]->type == MONO_TYPE_I) ||
4710                                          (fsig->params [0]->type == MONO_TYPE_OBJECT))
4711                                 opcode = OP_ATOMIC_EXCHANGE_I8;
4712                         else
4713                                 return NULL;
4714
4715                         MONO_INST_NEW (cfg, ins, opcode);
4716
4717                         ins->inst_i0 = args [0];
4718                         ins->inst_i1 = args [1];
4719                 } else if (strcmp (cmethod->name, "Read") == 0 && (fsig->params [0]->type == MONO_TYPE_I8)) {
4720                         /* 64 bit reads are already atomic */
4721                         MONO_INST_NEW (cfg, ins, CEE_LDIND_I8);
4722                         ins->inst_i0 = args [0];
4723                 }
4724
4725                 /* 
4726                  * Can't implement CompareExchange methods this way since they have
4727                  * three arguments.
4728                  */
4729         }
4730
4731         return ins;
4732 }
4733
4734 gboolean
4735 mono_arch_print_tree (MonoInst *tree, int arity)
4736 {
4737         return 0;
4738 }
4739
4740 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4741 {
4742         MonoInst* ins;
4743         
4744         if (appdomain_tls_offset == -1)
4745                 return NULL;
4746         
4747         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4748         ins->inst_offset = appdomain_tls_offset;
4749         return ins;
4750 }
4751
4752 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4753 {
4754         MonoInst* ins;
4755         
4756         if (thread_tls_offset == -1)
4757                 return NULL;
4758         
4759         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4760         ins->inst_offset = thread_tls_offset;
4761         return ins;
4762 }