2004-08-20 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *
11  * (C) 2003 Ximian, Inc.
12  */
13 #include "mini.h"
14 #include <string.h>
15 #include <math.h>
16
17 #include <mono/metadata/appdomain.h>
18 #include <mono/metadata/debug-helpers.h>
19 #include <mono/metadata/threads.h>
20 #include <mono/metadata/profiler-private.h>
21 #include <mono/utils/mono-math.h>
22
23 #include "trace.h"
24 #include "mini-amd64.h"
25 #include "inssel.h"
26 #include "cpu-amd64.h"
27
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
33
34 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
35
36 #ifdef PLATFORM_WIN32
37 /* Under windows, the default pinvoke calling convention is stdcall */
38 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
39 #else
40 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
41 #endif
42
43 #define SIGNAL_STACK_SIZE (64 * 1024)
44
45 #define ARGS_OFFSET 16
46 #define GP_SCRATCH_REG AMD64_R11
47
48 /*
49  * AMD64 register usage:
50  * - callee saved registers are used for global register allocation
51  * - %r11 is used for materializing 64 bit constants in opcodes
52  * - the rest is used for local allocation
53  */
54
55 /*
56  * FIXME: 
57  * - Use xmm registers instead of the x87 stack
58  * - Allocate arguments to global registers
59  * - implement emulated opcodes
60  * - (all archs) do not store trampoline addresses in method->info since they
61  *   are domain specific.   
62  */
63
64 #define NOT_IMPLEMENTED g_assert_not_reached ()
65
66 const char*
67 mono_arch_regname (int reg) {
68         switch (reg) {
69         case AMD64_RAX: return "%rax";
70         case AMD64_RBX: return "%rbx";
71         case AMD64_RCX: return "%rcx";
72         case AMD64_RDX: return "%rdx";
73         case AMD64_RSP: return "%rsp";  
74         case AMD64_RBP: return "%rbp";
75         case AMD64_RDI: return "%rdi";
76         case AMD64_RSI: return "%rsi";
77         case AMD64_R8: return "%r8";
78         case AMD64_R9: return "%r9";
79         case AMD64_R10: return "%r10";
80         case AMD64_R11: return "%r11";
81         case AMD64_R12: return "%r12";
82         case AMD64_R13: return "%r13";
83         case AMD64_R14: return "%r14";
84         case AMD64_R15: return "%r15";
85         }
86         return "unknown";
87 }
88
89 static inline void 
90 amd64_patch (unsigned char* code, gpointer target)
91 {
92         /* Skip REX */
93         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
94                 code += 1;
95
96         if (code [0] == 0xbb) {
97                 /* amd64_set_reg_template */
98                 *(guint64*)(code + 1) = (guint64)target;
99         }
100         else
101                 x86_patch (code, (unsigned char*)target);
102 }
103
104 typedef enum {
105         ArgInIReg,
106         ArgInFloatSSEReg,
107         ArgInDoubleSSEReg,
108         ArgOnStack,
109         ArgValuetypeInReg,
110         ArgNone /* only in pair_storage */
111 } ArgStorage;
112
113 typedef struct {
114         gint16 offset;
115         gint8  reg;
116         ArgStorage storage;
117
118         /* Only if storage == ArgValuetypeInReg */
119         ArgStorage pair_storage [2];
120         gint8 pair_regs [2];
121 } ArgInfo;
122
123 typedef struct {
124         int nargs;
125         guint32 stack_usage;
126         guint32 reg_usage;
127         guint32 freg_usage;
128         gboolean need_stack_align;
129         ArgInfo ret;
130         ArgInfo sig_cookie;
131         ArgInfo args [1];
132 } CallInfo;
133
134 #define DEBUG(a) if (cfg->verbose_level > 1) a
135
136 #define NEW_ICONST(cfg,dest,val) do {   \
137                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
138                 (dest)->opcode = OP_ICONST;     \
139                 (dest)->inst_c0 = (val);        \
140                 (dest)->type = STACK_I4;        \
141         } while (0)
142
143 #define PARAM_REGS 6
144
145 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
146
147 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
148
149 static void inline
150 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
151 {
152     ainfo->offset = *stack_size;
153
154     if (*gr >= PARAM_REGS) {
155                 ainfo->storage = ArgOnStack;
156                 (*stack_size) += sizeof (gpointer);
157     }
158     else {
159                 ainfo->storage = ArgInIReg;
160                 ainfo->reg = param_regs [*gr];
161                 (*gr) ++;
162     }
163 }
164
165 #define FLOAT_PARAM_REGS 8
166
167 static void inline
168 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
169 {
170     ainfo->offset = *stack_size;
171
172     if (*gr >= FLOAT_PARAM_REGS) {
173                 ainfo->storage = ArgOnStack;
174                 (*stack_size) += sizeof (gpointer);
175     }
176     else {
177                 /* A double register */
178                 if (is_double)
179                         ainfo->storage = ArgInDoubleSSEReg;
180                 else
181                         ainfo->storage = ArgInFloatSSEReg;
182                 ainfo->reg = *gr;
183                 (*gr) += 1;
184     }
185 }
186
187 typedef enum ArgumentClass {
188         ARG_CLASS_NO_CLASS,
189         ARG_CLASS_MEMORY,
190         ARG_CLASS_INTEGER,
191         ARG_CLASS_SSE
192 } ArgumentClass;
193
194 static ArgumentClass
195 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
196 {
197         ArgumentClass class2;
198
199         switch (type->type) {
200         case MONO_TYPE_BOOLEAN:
201         case MONO_TYPE_CHAR:
202         case MONO_TYPE_I1:
203         case MONO_TYPE_U1:
204         case MONO_TYPE_I2:
205         case MONO_TYPE_U2:
206         case MONO_TYPE_I4:
207         case MONO_TYPE_U4:
208         case MONO_TYPE_I:
209         case MONO_TYPE_U:
210         case MONO_TYPE_STRING:
211         case MONO_TYPE_OBJECT:
212         case MONO_TYPE_CLASS:
213         case MONO_TYPE_SZARRAY:
214         case MONO_TYPE_PTR:
215         case MONO_TYPE_FNPTR:
216         case MONO_TYPE_ARRAY:
217         case MONO_TYPE_I8:
218         case MONO_TYPE_U8:
219                 class2 = ARG_CLASS_INTEGER;
220                 break;
221         case MONO_TYPE_R4:
222         case MONO_TYPE_R8:
223                 class2 = ARG_CLASS_SSE;
224                 break;
225
226         case MONO_TYPE_TYPEDBYREF:
227                 g_assert_not_reached ();
228
229         case MONO_TYPE_VALUETYPE:
230                 if (type->data.klass->enumtype)
231                         class2 = ARG_CLASS_INTEGER;
232                 else {
233                         MonoMarshalType *info = mono_marshal_load_type_info (type->data.klass);
234                         int i;
235
236                         for (i = 0; i < info->num_fields; ++i) {
237                                 class2 = class1;
238                                 class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
239                         }
240                 }
241                 break;
242         }
243
244         /* Merge */
245         if (class1 == class2)
246                 ;
247         else if (class1 == ARG_CLASS_NO_CLASS)
248                 class1 = class2;
249         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
250                 class1 = ARG_CLASS_MEMORY;
251         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
252                 class1 = ARG_CLASS_INTEGER;
253         else
254                 class1 = ARG_CLASS_SSE;
255
256         return class1;
257 }
258
259 static void
260 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
261                            gboolean is_return,
262                            guint32 *gr, guint32 *fr, guint32 *stack_size)
263 {
264         guint32 size, quad, nquads, i;
265         ArgumentClass args [2];
266         MonoMarshalType *info;
267
268         if (sig->pinvoke) 
269                 size = mono_type_native_stack_size (&type->data.klass->byval_arg, NULL);
270         else 
271                 size = mono_type_stack_size (&type->data.klass->byval_arg, NULL);
272
273         if (!sig->pinvoke || (size == 0) || (size > 16)) {
274                 /* Allways pass in memory */
275                 ainfo->offset = *stack_size;
276                 *stack_size += ALIGN_TO (size, 8);
277                 ainfo->storage = ArgOnStack;
278
279                 return;
280         }
281
282         /* FIXME: Handle structs smaller than 8 bytes */
283         //if ((size % 8) != 0)
284         //      NOT_IMPLEMENTED;
285
286         if (size > 8)
287                 nquads = 2;
288         else
289                 nquads = 1;
290
291         /*
292          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
293          * The X87 and SSEUP stuff is left out since there are no such types in
294          * the CLR.
295          */
296         info = mono_marshal_load_type_info (type->data.klass);
297         g_assert (info);
298         if (info->native_size > 16) {
299                 ainfo->offset = *stack_size;
300                 *stack_size += ALIGN_TO (info->native_size, 8);
301                 ainfo->storage = ArgOnStack;
302
303                 return;
304         }
305
306         for (quad = 0; quad < nquads; ++quad) {
307                 int size, align;
308                 ArgumentClass class1;
309                 
310                 class1 = ARG_CLASS_NO_CLASS;
311                 for (i = 0; i < info->num_fields; ++i) {
312                         size = mono_marshal_type_size (info->fields [i].field->type, 
313                                                                                    info->fields [i].mspec, 
314                                                                                    &align, TRUE, type->data.klass->unicode);
315                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
316                                 /* Unaligned field */
317                                 NOT_IMPLEMENTED;
318                         }
319
320                         /* Skip fields in other quad */
321                         if ((quad == 0) && (info->fields [i].offset >= 8))
322                                 continue;
323                         if ((quad == 1) && (info->fields [i].offset < 8))
324                                 continue;
325
326                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
327                 }
328                 g_assert (class1 != ARG_CLASS_NO_CLASS);
329                 args [quad] = class1;
330         }
331
332         /* Post merger cleanup */
333         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
334                 args [0] = args [1] = ARG_CLASS_MEMORY;
335
336         /* Allocate registers */
337         {
338                 int orig_gr = *gr;
339                 int orig_fr = *fr;
340
341                 ainfo->storage = ArgValuetypeInReg;
342                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
343                 for (quad = 0; quad < nquads; ++quad) {
344                         switch (args [quad]) {
345                         case ARG_CLASS_INTEGER:
346                                 if (*gr >= PARAM_REGS)
347                                         args [quad] = ARG_CLASS_MEMORY;
348                                 else {
349                                         ainfo->pair_storage [quad] = ArgInIReg;
350                                         if (is_return)
351                                                 ainfo->pair_regs [quad] = return_regs [*gr];
352                                         else
353                                                 ainfo->pair_regs [quad] = param_regs [*gr];
354                                         (*gr) ++;
355                                 }
356                                 break;
357                         case ARG_CLASS_SSE:
358                                 if (*fr >= FLOAT_PARAM_REGS)
359                                         args [quad] = ARG_CLASS_MEMORY;
360                                 else {
361                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
362                                         ainfo->pair_regs [quad] = *fr;
363                                         (*fr) ++;
364                                 }
365                                 break;
366                         case ARG_CLASS_MEMORY:
367                                 break;
368                         default:
369                                 g_assert_not_reached ();
370                         }
371                 }
372
373                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
374                         /* Revert possible register assignments */
375                         *gr = orig_gr;
376                         *fr = orig_fr;
377
378                         ainfo->offset = *stack_size;
379                         *stack_size += ALIGN_TO (info->native_size, 8);
380                         ainfo->storage = ArgOnStack;
381                 }
382         }
383 }
384
385 /*
386  * get_call_info:
387  *
388  *  Obtain information about a call according to the calling convention.
389  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
390  * Draft Version 0.23" document for more information.
391  */
392 static CallInfo*
393 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
394 {
395         guint32 i, gr, fr, simpletype;
396         int n = sig->hasthis + sig->param_count;
397         guint32 stack_size = 0;
398         CallInfo *cinfo;
399
400         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
401
402         gr = 0;
403         fr = 0;
404
405         /* return value */
406         {
407                 simpletype = sig->ret->type;
408 enum_retvalue:
409                 switch (simpletype) {
410                 case MONO_TYPE_BOOLEAN:
411                 case MONO_TYPE_I1:
412                 case MONO_TYPE_U1:
413                 case MONO_TYPE_I2:
414                 case MONO_TYPE_U2:
415                 case MONO_TYPE_CHAR:
416                 case MONO_TYPE_I4:
417                 case MONO_TYPE_U4:
418                 case MONO_TYPE_I:
419                 case MONO_TYPE_U:
420                 case MONO_TYPE_PTR:
421                 case MONO_TYPE_CLASS:
422                 case MONO_TYPE_OBJECT:
423                 case MONO_TYPE_SZARRAY:
424                 case MONO_TYPE_ARRAY:
425                 case MONO_TYPE_STRING:
426                         cinfo->ret.storage = ArgInIReg;
427                         cinfo->ret.reg = AMD64_RAX;
428                         break;
429                 case MONO_TYPE_U8:
430                 case MONO_TYPE_I8:
431                         cinfo->ret.storage = ArgInIReg;
432                         cinfo->ret.reg = AMD64_RAX;
433                         break;
434                 case MONO_TYPE_R4:
435                         cinfo->ret.storage = ArgInFloatSSEReg;
436                         cinfo->ret.reg = AMD64_XMM0;
437                         break;
438                 case MONO_TYPE_R8:
439                         cinfo->ret.storage = ArgInDoubleSSEReg;
440                         cinfo->ret.reg = AMD64_XMM0;
441                         break;
442                 case MONO_TYPE_VALUETYPE: {
443                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
444
445                         if (sig->ret->data.klass->enumtype) {
446                                 simpletype = sig->ret->data.klass->enum_basetype->type;
447                                 goto enum_retvalue;
448                         }
449
450                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
451                         if (cinfo->ret.storage == ArgOnStack)
452                                 /* The caller passes the address where the value is stored */
453                                 add_general (&gr, &stack_size, &cinfo->ret);
454                         break;
455                 }
456                 case MONO_TYPE_TYPEDBYREF:
457                         /* Same as a valuetype with size 24 */
458                         add_general (&gr, &stack_size, &cinfo->ret);
459                         ;
460                         break;
461                 case MONO_TYPE_VOID:
462                         break;
463                 default:
464                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
465                 }
466         }
467
468         /* this */
469         if (sig->hasthis)
470                 add_general (&gr, &stack_size, cinfo->args + 0);
471
472         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
473                 gr = PARAM_REGS;
474                 fr = FLOAT_PARAM_REGS;
475                 
476                 /* Emit the signature cookie just before the implicit arguments */
477                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
478         }
479
480         for (i = 0; i < sig->param_count; ++i) {
481                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
482
483                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
484                         /* We allways pass the sig cookie on the stack for simplicity */
485                         /* 
486                          * Prevent implicit arguments + the sig cookie from being passed 
487                          * in registers.
488                          */
489                         gr = PARAM_REGS;
490                         fr = FLOAT_PARAM_REGS;
491
492                         /* Emit the signature cookie just before the implicit arguments */
493                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
494                 }
495
496                 if (sig->params [i]->byref) {
497                         add_general (&gr, &stack_size, ainfo);
498                         continue;
499                 }
500                 simpletype = sig->params [i]->type;
501         enum_calc_size:
502                 switch (simpletype) {
503                 case MONO_TYPE_BOOLEAN:
504                 case MONO_TYPE_I1:
505                 case MONO_TYPE_U1:
506                         add_general (&gr, &stack_size, ainfo);
507                         break;
508                 case MONO_TYPE_I2:
509                 case MONO_TYPE_U2:
510                 case MONO_TYPE_CHAR:
511                         add_general (&gr, &stack_size, ainfo);
512                         break;
513                 case MONO_TYPE_I4:
514                 case MONO_TYPE_U4:
515                         add_general (&gr, &stack_size, ainfo);
516                         break;
517                 case MONO_TYPE_I:
518                 case MONO_TYPE_U:
519                 case MONO_TYPE_PTR:
520                 case MONO_TYPE_CLASS:
521                 case MONO_TYPE_OBJECT:
522                 case MONO_TYPE_STRING:
523                 case MONO_TYPE_SZARRAY:
524                 case MONO_TYPE_ARRAY:
525                         add_general (&gr, &stack_size, ainfo);
526                         break;
527                 case MONO_TYPE_VALUETYPE:
528                         if (sig->params [i]->data.klass->enumtype) {
529                                 simpletype = sig->params [i]->data.klass->enum_basetype->type;
530                                 goto enum_calc_size;
531                         }
532
533                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
534                         break;
535                 case MONO_TYPE_TYPEDBYREF:
536                         stack_size += sizeof (MonoTypedRef);
537                         ainfo->storage = ArgOnStack;
538                         break;
539                 case MONO_TYPE_U8:
540                 case MONO_TYPE_I8:
541                         add_general (&gr, &stack_size, ainfo);
542                         break;
543                 case MONO_TYPE_R4:
544                         add_float (&fr, &stack_size, ainfo, FALSE);
545                         break;
546                 case MONO_TYPE_R8:
547                         add_float (&fr, &stack_size, ainfo, TRUE);
548                         break;
549                 default:
550                         g_assert_not_reached ();
551                 }
552         }
553
554         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
555                 gr = PARAM_REGS;
556                 fr = FLOAT_PARAM_REGS;
557                 
558                 /* Emit the signature cookie just before the implicit arguments */
559                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
560         }
561
562         if (stack_size & 0x8) {
563                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
564                 cinfo->need_stack_align = TRUE;
565                 stack_size += 8;
566         }
567
568         cinfo->stack_usage = stack_size;
569         cinfo->reg_usage = gr;
570         cinfo->freg_usage = fr;
571         return cinfo;
572 }
573
574 /*
575  * mono_arch_get_argument_info:
576  * @csig:  a method signature
577  * @param_count: the number of parameters to consider
578  * @arg_info: an array to store the result infos
579  *
580  * Gathers information on parameters such as size, alignment and
581  * padding. arg_info should be large enought to hold param_count + 1 entries. 
582  *
583  * Returns the size of the activation frame.
584  */
585 int
586 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
587 {
588         int k;
589
590         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
591         if (csig->hasthis) {
592                 arg_info [0].offset = 0;
593         }
594
595         for (k = 0; k < param_count; k++) {
596                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
597                 /* FIXME: */
598                 arg_info [k + 1].size = 0;
599         }
600
601         /* FIXME: */
602         return 0;
603 }
604
605 static int 
606 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
607 {
608         return 0;
609 }
610
611 /*
612  * Initialize the cpu to execute managed code.
613  */
614 void
615 mono_arch_cpu_init (void)
616 {
617         guint16 fpcw;
618
619         /* spec compliance requires running with double precision */
620         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
621         fpcw &= ~X86_FPCW_PRECC_MASK;
622         fpcw |= X86_FPCW_PREC_DOUBLE;
623         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
624         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
625
626         mono_amd64_exceptions_init ();
627 }
628
629 /*
630  * This function returns the optimizations supported on this cpu.
631  */
632 guint32
633 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
634 {
635         int eax, ebx, ecx, edx;
636         guint32 opts = 0;
637
638         /* FIXME: AMD64 */
639
640         *exclude_mask = 0;
641         /* Feature Flags function, flags returned in EDX. */
642         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
643                 if (edx & (1 << 15)) {
644                         opts |= MONO_OPT_CMOV;
645                         if (edx & 1)
646                                 opts |= MONO_OPT_FCMOV;
647                         else
648                                 *exclude_mask |= MONO_OPT_FCMOV;
649                 } else
650                         *exclude_mask |= MONO_OPT_CMOV;
651         }
652         return opts;
653 }
654
655 static gboolean
656 is_regsize_var (MonoType *t) {
657         if (t->byref)
658                 return TRUE;
659         switch (t->type) {
660         case MONO_TYPE_I4:
661         case MONO_TYPE_U4:
662         case MONO_TYPE_I:
663         case MONO_TYPE_U:
664         case MONO_TYPE_PTR:
665                 return TRUE;
666         case MONO_TYPE_OBJECT:
667         case MONO_TYPE_STRING:
668         case MONO_TYPE_CLASS:
669         case MONO_TYPE_SZARRAY:
670         case MONO_TYPE_ARRAY:
671                 return TRUE;
672         case MONO_TYPE_VALUETYPE:
673                 if (t->data.klass->enumtype)
674                         return is_regsize_var (t->data.klass->enum_basetype);
675                 return FALSE;
676         }
677         return FALSE;
678 }
679
680 GList *
681 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
682 {
683         GList *vars = NULL;
684         int i;
685
686         for (i = 0; i < cfg->num_varinfo; i++) {
687                 MonoInst *ins = cfg->varinfo [i];
688                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
689
690                 /* unused vars */
691                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
692                         continue;
693
694                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
695                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
696                         continue;
697
698                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
699                  * 8bit quantities in caller saved registers on x86 */
700                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
701                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
702                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
703                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
704                         g_assert (i == vmv->idx);
705                         vars = g_list_prepend (vars, vmv);
706                 }
707         }
708
709         vars = mono_varlist_sort (cfg, vars, 0);
710
711         return vars;
712 }
713
714 GList *
715 mono_arch_get_global_int_regs (MonoCompile *cfg)
716 {
717         GList *regs = NULL;
718
719         /* We use the callee saved registers for global allocation */
720         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
721         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
722         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
723         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
724         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
725
726         return regs;
727 }
728
729 /*
730  * mono_arch_regalloc_cost:
731  *
732  *  Return the cost, in number of memory references, of the action of 
733  * allocating the variable VMV into a register during global register
734  * allocation.
735  */
736 guint32
737 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
738 {
739         MonoInst *ins = cfg->varinfo [vmv->idx];
740
741         if (cfg->method->save_lmf)
742                 /* The register is already saved */
743                 /* substract 1 for the invisible store in the prolog */
744                 return (ins->opcode == OP_ARG) ? 0 : 1;
745         else
746                 /* push+pop */
747                 return (ins->opcode == OP_ARG) ? 1 : 2;
748 }
749  
750 void
751 mono_arch_allocate_vars (MonoCompile *m)
752 {
753         MonoMethodSignature *sig;
754         MonoMethodHeader *header;
755         MonoInst *inst;
756         int i, offset, size, align, curinst;
757         CallInfo *cinfo;
758
759         header = ((MonoMethodNormal *)m->method)->header;
760
761         sig = m->method->signature;
762
763         cinfo = get_call_info (sig, FALSE);
764
765         /*
766          * We use the ABI calling conventions for managed code as well.
767          * Exception: valuetypes are never passed or returned in registers.
768          */
769
770         /* Locals are allocated backwards from %fp */
771         m->frame_reg = AMD64_RBP;
772         offset = 0;
773
774         /* Reserve space for caller saved registers */
775         for (i = 0; i < AMD64_NREG; ++i)
776                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (m->used_int_regs & (1 << i))) {
777                         offset += sizeof (gpointer);
778                 }
779
780         if (m->method->save_lmf) {
781                 /* Reserve stack space for saving LMF + argument regs */
782                 offset += sizeof (MonoLMF);
783                 if (lmf_tls_offset == -1)
784                         /* Need to save argument regs too */
785                         offset += (AMD64_NREG * 8) + (8 * 8);
786                 m->arch.lmf_offset = offset;
787         }
788
789         if (sig->ret->type != MONO_TYPE_VOID) {
790                 switch (cinfo->ret.storage) {
791                 case ArgInIReg:
792                 case ArgInFloatSSEReg:
793                 case ArgInDoubleSSEReg:
794                         if (((sig->ret->type == MONO_TYPE_VALUETYPE) && !sig->ret->data.klass->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
795                                 /* The register is volatile */
796                                 m->ret->opcode = OP_REGOFFSET;
797                                 m->ret->inst_basereg = AMD64_RBP;
798                                 offset += 8;
799                                 m->ret->inst_offset = - offset;
800                         }
801                         else {
802                                 m->ret->opcode = OP_REGVAR;
803                                 m->ret->inst_c0 = cinfo->ret.reg;
804                         }
805                         break;
806                 default:
807                         g_assert_not_reached ();
808                 }
809                 m->ret->dreg = m->ret->inst_c0;
810         }
811
812         curinst = m->locals_start;
813         for (i = curinst; i < m->num_varinfo; ++i) {
814                 inst = m->varinfo [i];
815
816                 if (inst->opcode == OP_REGVAR) {
817                         //g_print ("allocating local %d to %s\n", i, mono_arch_regname (inst->dreg));
818                         continue;
819                 }
820
821                 /* inst->unused indicates native sized value types, this is used by the
822                 * pinvoke wrappers when they call functions returning structure */
823                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
824                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
825                 else
826                         size = mono_type_stack_size (inst->inst_vtype, &align);
827
828                 /*
829                  * variables are accessed as negative offsets from %fp, so increase
830                  * the offset before assigning it to a variable
831                  */
832                 offset += size;
833
834                 offset += align - 1;
835                 offset &= ~(align - 1);
836                 inst->opcode = OP_REGOFFSET;
837                 inst->inst_basereg = AMD64_RBP;
838                 inst->inst_offset = - offset;
839
840                 //g_print ("allocating local %d to [%s - %d]\n", i, mono_arch_regname (inst->inst_basereg), - inst->inst_offset);
841         }
842
843         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
844                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
845                 m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
846         }
847
848         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
849                 inst = m->varinfo [i];
850                 if (inst->opcode != OP_REGVAR) {
851                         ArgInfo *ainfo = &cinfo->args [i];
852                         gboolean inreg = TRUE;
853                         MonoType *arg_type;
854
855                         if (sig->hasthis && (i == 0))
856                                 arg_type = &mono_defaults.object_class->byval_arg;
857                         else
858                                 arg_type = sig->params [i - sig->hasthis];
859
860                         /* FIXME: Allocate volatile arguments to registers */
861                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
862                                 inreg = FALSE;
863
864                         /* 
865                          * Under AMD64, all registers used to pass arguments to functions
866                          * are volatile across calls.
867                          * FIXME: Optimize this.
868                          */
869                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg))
870                                 inreg = FALSE;
871
872                         inst->opcode = OP_REGOFFSET;
873
874                         switch (ainfo->storage) {
875                         case ArgInIReg:
876                         case ArgInFloatSSEReg:
877                         case ArgInDoubleSSEReg:
878                                 inst->opcode = OP_REGVAR;
879                                 inst->dreg = ainfo->reg;
880                                 break;
881                         case ArgOnStack:
882                                 inst->opcode = OP_REGOFFSET;
883                                 inst->inst_basereg = AMD64_RBP;
884                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
885                                 break;
886                         default:
887                                 NOT_IMPLEMENTED;
888                         }
889
890                         if (!inreg && (ainfo->storage != ArgOnStack)) {
891                                 inst->opcode = OP_REGOFFSET;
892                                 inst->inst_basereg = AMD64_RBP;
893                                 /* These arguments are saved to the stack in the prolog */
894                                 offset += 8;
895                                 inst->inst_offset = - offset;
896                         }
897                 }
898         }
899
900         m->stack_offset = offset;
901
902         g_free (cinfo);
903 }
904
905 static void
906 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg)
907 {
908         switch (storage) {
909         case ArgInIReg:
910                 /*
911                  * Since the registers used to pass parameters are volatile,
912                  * and they are used in local reg allocation, we store the
913                  * arguments to local variables and load them into the
914                  * registers when emitting the call opcode.
915                  * FIXME: Optimize this.
916                  */
917                 arg->opcode = OP_OUTARG_REG;
918                 arg->inst_left = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
919                 //arg->ssa_op = MONO_SSA_STORE;
920                 arg->unused = reg;
921                 call->used_iregs |= 1 << reg;
922                 call->out_reg_args = g_slist_append (call->out_reg_args, arg);
923                 break;
924         case ArgInFloatSSEReg:
925                 /* FIXME: These are volatile as well */
926                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
927                 arg->unused = reg;
928                 break;
929         case ArgInDoubleSSEReg:
930                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
931                 arg->unused = reg;
932                 break;
933         default:
934                 g_assert_not_reached ();
935         }
936 }
937
938 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
939  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
940  */
941
942 static int
943 arg_storage_to_ldind (ArgStorage storage)
944 {
945         switch (storage) {
946         case ArgInIReg:
947                 return CEE_LDIND_I;
948         case ArgInDoubleSSEReg:
949                 return CEE_LDIND_R8;
950         case ArgInFloatSSEReg:
951                 return CEE_LDIND_R4;
952         default:
953                 g_assert_not_reached ();
954         }
955
956         return -1;
957 }
958
959 /* 
960  * take the arguments and generate the arch-specific
961  * instructions to properly call the function in call.
962  * This includes pushing, moving arguments to the right register
963  * etc.
964  * Issue: who does the spilling if needed, and when?
965  */
966 MonoCallInst*
967 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
968         MonoInst *arg, *in;
969         MonoMethodSignature *sig;
970         int i, n, stack_size;
971         CallInfo *cinfo;
972         ArgInfo *ainfo;
973
974         stack_size = 0;
975
976         sig = call->signature;
977         n = sig->param_count + sig->hasthis;
978
979         cinfo = get_call_info (sig, sig->pinvoke);
980
981         for (i = 0; i < n; ++i) {
982                 ainfo = cinfo->args + i;
983
984                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
985                         MonoMethodSignature *tmp_sig;
986                         
987                         /* Emit the signature cookie just before the implicit arguments */
988                         MonoInst *sig_arg;
989                         /* FIXME: Add support for signature tokens to AOT */
990                         cfg->disable_aot = TRUE;
991
992                         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
993
994                         /*
995                          * mono_ArgIterator_Setup assumes the signature cookie is 
996                          * passed first and all the arguments which were before it are
997                          * passed on the stack after the signature. So compensate by 
998                          * passing a different signature.
999                          */
1000                         tmp_sig = mono_metadata_signature_dup (call->signature);
1001                         tmp_sig->param_count -= call->signature->sentinelpos;
1002                         tmp_sig->sentinelpos = 0;
1003                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1004
1005                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1006                         sig_arg->inst_p0 = tmp_sig;
1007
1008                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1009                         arg->inst_left = sig_arg;
1010                         arg->type = STACK_PTR;
1011
1012                         /* prepend, so they get reversed */
1013                         arg->next = call->out_args;
1014                         call->out_args = arg;
1015                 }
1016
1017                 if (is_virtual && i == 0) {
1018                         /* the argument will be attached to the call instruction */
1019                         in = call->args [i];
1020                 } else {
1021                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1022                         in = call->args [i];
1023                         arg->cil_code = in->cil_code;
1024                         arg->inst_left = in;
1025                         arg->type = in->type;
1026                         /* prepend, so they get reversed */
1027                         arg->next = call->out_args;
1028                         call->out_args = arg;
1029
1030                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1031                                 gint align;
1032                                 guint32 size;
1033
1034                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1035                                         size = sizeof (MonoTypedRef);
1036                                         align = sizeof (gpointer);
1037                                 }
1038                                 else
1039                                 if (sig->pinvoke)
1040                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1041                                 else
1042                                         size = mono_type_stack_size (&in->klass->byval_arg, &align);
1043                                 if (ainfo->storage == ArgValuetypeInReg) {
1044                                         if (ainfo->pair_storage [1] == ArgNone) {
1045                                                 MonoInst *load;
1046
1047                                                 /* Simpler case */
1048
1049                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1050                                                 load->inst_left = in;
1051
1052                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0]);
1053                                                 if (arg->opcode == OP_OUTARG_REG)
1054                                                         arg->inst_right = load;
1055                                                 else
1056                                                         arg->inst_left = load;
1057                                         }
1058                                         else {
1059                                                 /* Trees can't be shared so make a copy */
1060                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1061                                                 MonoInst *load, *load2, *offset_ins;
1062
1063                                                 /* Reg1 */
1064                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1065                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1066                                                 //load->ssa_op = MONO_SSA_LOAD;
1067
1068                                                 NEW_ICONST (cfg, offset_ins, 0);
1069                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1070                                                 load2->inst_left = load;
1071                                                 load2->inst_right = offset_ins;
1072
1073                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1074                                                 load->inst_left = load2;
1075
1076                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0]);
1077                                                 if (arg->opcode == OP_OUTARG_REG)
1078                                                         arg->inst_right = load;
1079                                                 else
1080                                                         arg->inst_left = load;
1081
1082                                                 /* Reg2 */
1083                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1084                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1085                                                 //load->ssa_op = MONO_SSA_LOAD;
1086
1087                                                 NEW_ICONST (cfg, offset_ins, 8);
1088                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1089                                                 load2->inst_left = load;
1090                                                 load2->inst_right = offset_ins;
1091
1092                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1093                                                 load->inst_left = load2;
1094
1095                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1096                                                 arg->cil_code = in->cil_code;
1097                                                 arg->type = in->type;
1098                                                 /* prepend, so they get reversed */
1099                                                 arg->next = call->out_args;
1100                                                 call->out_args = arg;
1101
1102                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1]);
1103                                                 if (arg->opcode == OP_OUTARG_REG)
1104                                                         arg->inst_right = load;
1105                                                 else
1106                                                         arg->inst_left = load;
1107
1108                                                 /* Prepend a copy inst */
1109                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1110                                                 arg->cil_code = in->cil_code;
1111                                                 arg->inst_left = vtaddr;
1112                                                 arg->inst_right = in;
1113                                                 arg->type = in->type;
1114                                                 //arg->ssa_op = MONO_SSA_STORE;
1115                                                 /* prepend, so they get reversed */
1116                                                 arg->next = call->out_args;
1117                                                 call->out_args = arg;
1118                                         }
1119                                 }
1120                                 else {
1121                                         arg->opcode = OP_OUTARG_VT;
1122                                         arg->klass = in->klass;
1123                                         arg->unused = sig->pinvoke;
1124                                         arg->inst_imm = size;
1125                                 }
1126                         }
1127                         else {
1128                                 switch (ainfo->storage) {
1129                                 case ArgInIReg:
1130                                         arg->inst_right = in;
1131                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg);
1132                                         break;
1133                                 case ArgInFloatSSEReg:
1134                                 case ArgInDoubleSSEReg:
1135                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg);
1136                                         break;
1137                                 case ArgOnStack:
1138                                         arg->opcode = OP_OUTARG;
1139                                         if (!sig->params [i - sig->hasthis]->byref) {
1140                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1141                                                         arg->opcode = OP_OUTARG_R4;
1142                                                 else
1143                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1144                                                                 arg->opcode = OP_OUTARG_R8;
1145                                         }
1146                                         break;
1147                                 default:
1148                                         g_assert_not_reached ();
1149                                 }
1150                         }
1151                 }
1152         }
1153
1154         if (cinfo->need_stack_align) {
1155                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1156                 /* prepend, so they get reversed */
1157                 arg->next = call->out_args;
1158                 call->out_args = arg;
1159         }
1160
1161         call->stack_usage = cinfo->stack_usage;
1162         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1163         cfg->flags |= MONO_CFG_HAS_CALLS;
1164
1165         g_free (cinfo);
1166
1167         return call;
1168 }
1169
1170 #define EMIT_COND_BRANCH(ins,cond,sign) \
1171 if (ins->flags & MONO_INST_BRLABEL) { \
1172         if (ins->inst_i0->inst_c0) { \
1173                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1174         } else { \
1175                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1176                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1177                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1178                         x86_branch8 (code, cond, 0, sign); \
1179                 else \
1180                         x86_branch32 (code, cond, 0, sign); \
1181         } \
1182 } else { \
1183         if (ins->inst_true_bb->native_offset) { \
1184                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1185         } else { \
1186                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1187                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1188                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1189                         x86_branch8 (code, cond, 0, sign); \
1190                 else \
1191                         x86_branch32 (code, cond, 0, sign); \
1192         } \
1193 }
1194
1195 /* emit an exception if condition is fail */
1196 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1197         do {                                                        \
1198                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1199                                     MONO_PATCH_INFO_EXC, exc_name);  \
1200                 x86_branch32 (code, cond, 0, signed);               \
1201         } while (0); 
1202
1203 #define EMIT_FPCOMPARE(code) do { \
1204         amd64_fcompp (code); \
1205         amd64_fnstsw (code); \
1206 } while (0); 
1207
1208 /*
1209  * Emitting a call and patching it later is expensive on amd64, so try to
1210  * determine the patch target immediately, and emit more efficient code if
1211  * possible.
1212  */
1213 static guint8*
1214 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1215 {
1216         /* FIXME: */
1217         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1218         amd64_set_reg_template (code, GP_SCRATCH_REG);
1219         amd64_call_reg (code, GP_SCRATCH_REG);
1220
1221         return code;
1222 }
1223
1224 #define EMIT_CALL() do { \
1225     amd64_set_reg_template (code, GP_SCRATCH_REG); \
1226     amd64_call_reg (code, GP_SCRATCH_REG); \
1227 } while (0);
1228
1229 /* FIXME: Add more instructions */
1230 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM))
1231
1232 static void
1233 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1234 {
1235         MonoInst *ins, *last_ins = NULL;
1236         ins = bb->code;
1237
1238         while (ins) {
1239
1240                 switch (ins->opcode) {
1241                 case OP_ICONST:
1242                         /* reg = 0 -> XOR (reg, reg) */
1243                         /* XOR sets cflags on x86, so we cant do it always */
1244                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1245                                 ins->opcode = CEE_XOR;
1246                                 ins->sreg1 = ins->dreg;
1247                                 ins->sreg2 = ins->dreg;
1248                         }
1249                         break;
1250                 case OP_MUL_IMM: 
1251                         /* remove unnecessary multiplication with 1 */
1252                         if (ins->inst_imm == 1) {
1253                                 if (ins->dreg != ins->sreg1) {
1254                                         ins->opcode = OP_MOVE;
1255                                 } else {
1256                                         last_ins->next = ins->next;
1257                                         ins = ins->next;
1258                                         continue;
1259                                 }
1260                         }
1261                         break;
1262                 case OP_COMPARE_IMM:
1263                         /* OP_COMPARE_IMM (reg, 0) 
1264                          * --> 
1265                          * OP_AMD64_TEST_NULL (reg) 
1266                          */
1267                         if (!ins->inst_imm)
1268                                 ins->opcode = OP_X86_TEST_NULL;
1269                         break;
1270                 case OP_X86_COMPARE_MEMBASE_IMM:
1271                         /* 
1272                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1273                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1274                          * -->
1275                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1276                          * OP_COMPARE_IMM reg, imm
1277                          *
1278                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1279                          */
1280                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1281                             ins->inst_basereg == last_ins->inst_destbasereg &&
1282                             ins->inst_offset == last_ins->inst_offset) {
1283                                         ins->opcode = OP_COMPARE_IMM;
1284                                         ins->sreg1 = last_ins->sreg1;
1285
1286                                         /* check if we can remove cmp reg,0 with test null */
1287                                         if (!ins->inst_imm)
1288                                                 ins->opcode = OP_X86_TEST_NULL;
1289                                 }
1290
1291                         break;
1292                 case OP_LOAD_MEMBASE:
1293                 case OP_LOADI4_MEMBASE:
1294                         /* 
1295                          * Note: if reg1 = reg2 the load op is removed
1296                          *
1297                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1298                          * OP_LOAD_MEMBASE offset(basereg), reg2
1299                          * -->
1300                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1301                          * OP_MOVE reg1, reg2
1302                          */
1303                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1304                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1305                             ins->inst_basereg == last_ins->inst_destbasereg &&
1306                             ins->inst_offset == last_ins->inst_offset) {
1307                                 if (ins->dreg == last_ins->sreg1) {
1308                                         last_ins->next = ins->next;                             
1309                                         ins = ins->next;                                
1310                                         continue;
1311                                 } else {
1312                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1313                                         ins->opcode = OP_MOVE;
1314                                         ins->sreg1 = last_ins->sreg1;
1315                                 }
1316
1317                         /* 
1318                          * Note: reg1 must be different from the basereg in the second load
1319                          * Note: if reg1 = reg2 is equal then second load is removed
1320                          *
1321                          * OP_LOAD_MEMBASE offset(basereg), reg1
1322                          * OP_LOAD_MEMBASE offset(basereg), reg2
1323                          * -->
1324                          * OP_LOAD_MEMBASE offset(basereg), reg1
1325                          * OP_MOVE reg1, reg2
1326                          */
1327                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1328                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1329                               ins->inst_basereg != last_ins->dreg &&
1330                               ins->inst_basereg == last_ins->inst_basereg &&
1331                               ins->inst_offset == last_ins->inst_offset) {
1332
1333                                 if (ins->dreg == last_ins->dreg) {
1334                                         last_ins->next = ins->next;                             
1335                                         ins = ins->next;                                
1336                                         continue;
1337                                 } else {
1338                                         ins->opcode = OP_MOVE;
1339                                         ins->sreg1 = last_ins->dreg;
1340                                 }
1341
1342                                 //g_assert_not_reached ();
1343
1344 #if 0
1345                         /* 
1346                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1347                          * OP_LOAD_MEMBASE offset(basereg), reg
1348                          * -->
1349                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1350                          * OP_ICONST reg, imm
1351                          */
1352                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1353                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1354                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1355                                    ins->inst_offset == last_ins->inst_offset) {
1356                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1357                                 ins->opcode = OP_ICONST;
1358                                 ins->inst_c0 = last_ins->inst_imm;
1359                                 g_assert_not_reached (); // check this rule
1360 #endif
1361                         }
1362                         break;
1363                 case OP_LOADU1_MEMBASE:
1364                 case OP_LOADI1_MEMBASE:
1365                         /* 
1366                          * Note: if reg1 = reg2 the load op is removed
1367                          *
1368                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1369                          * OP_LOAD_MEMBASE offset(basereg), reg2
1370                          * -->
1371                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1372                          * OP_MOVE reg1, reg2
1373                          */
1374                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1375                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1376                                         ins->inst_offset == last_ins->inst_offset) {
1377                                 if (ins->dreg == last_ins->sreg1) {
1378                                         last_ins->next = ins->next;                             
1379                                         ins = ins->next;                                
1380                                         continue;
1381                                 } else {
1382                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1383                                         ins->opcode = OP_MOVE;
1384                                         ins->sreg1 = last_ins->sreg1;
1385                                 }
1386                         }
1387                         break;
1388                 case OP_LOADU2_MEMBASE:
1389                 case OP_LOADI2_MEMBASE:
1390                         /* 
1391                          * Note: if reg1 = reg2 the load op is removed
1392                          *
1393                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1394                          * OP_LOAD_MEMBASE offset(basereg), reg2
1395                          * -->
1396                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1397                          * OP_MOVE reg1, reg2
1398                          */
1399                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1400                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1401                                         ins->inst_offset == last_ins->inst_offset) {
1402                                 if (ins->dreg == last_ins->sreg1) {
1403                                         last_ins->next = ins->next;                             
1404                                         ins = ins->next;                                
1405                                         continue;
1406                                 } else {
1407                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1408                                         ins->opcode = OP_MOVE;
1409                                         ins->sreg1 = last_ins->sreg1;
1410                                 }
1411                         }
1412                         break;
1413                 case CEE_CONV_I4:
1414                 case CEE_CONV_U4:
1415                 case OP_MOVE:
1416                         /*
1417                          * Removes:
1418                          *
1419                          * OP_MOVE reg, reg 
1420                          */
1421                         if (ins->dreg == ins->sreg1) {
1422                                 if (last_ins)
1423                                         last_ins->next = ins->next;                             
1424                                 ins = ins->next;
1425                                 continue;
1426                         }
1427                         /* 
1428                          * Removes:
1429                          *
1430                          * OP_MOVE sreg, dreg 
1431                          * OP_MOVE dreg, sreg
1432                          */
1433                         if (last_ins && last_ins->opcode == OP_MOVE &&
1434                             ins->sreg1 == last_ins->dreg &&
1435                             ins->dreg == last_ins->sreg1) {
1436                                 last_ins->next = ins->next;                             
1437                                 ins = ins->next;                                
1438                                 continue;
1439                         }
1440                         break;
1441                 }
1442                 last_ins = ins;
1443                 ins = ins->next;
1444         }
1445         bb->last_ins = last_ins;
1446 }
1447
1448 static const int 
1449 branch_cc_table [] = {
1450         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1451         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1452         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1453 };
1454
1455 static int
1456 opcode_to_x86_cond (int opcode)
1457 {
1458         switch (opcode) {
1459         case OP_IBEQ:
1460                 return X86_CC_EQ;
1461         case OP_IBNE_UN:
1462                 return X86_CC_NE;
1463         case OP_IBLT:
1464                 return X86_CC_LT;
1465         case OP_IBLT_UN:
1466                 return X86_CC_LT;
1467         case OP_IBGT:
1468                 return X86_CC_GT;
1469         case OP_IBGT_UN:
1470                 return X86_CC_GT;
1471         case OP_IBGE:
1472                 return X86_CC_GE;
1473         case OP_IBGE_UN:
1474                 return X86_CC_GE;
1475         case OP_IBLE:
1476                 return X86_CC_LE;
1477         case OP_IBLE_UN:
1478                 return X86_CC_LE;
1479         case OP_COND_EXC_IOV:
1480                 return X86_CC_O;
1481         case OP_COND_EXC_IC:
1482                 return X86_CC_C;
1483         default:
1484                 g_assert_not_reached ();
1485         }
1486
1487         return -1;
1488 }
1489
1490 /*
1491  * returns the offset used by spillvar. It allocates a new
1492  * spill variable if necessary. 
1493  */
1494 static int
1495 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1496 {
1497         MonoSpillInfo **si, *info;
1498         int i = 0;
1499
1500         si = &cfg->spill_info; 
1501         
1502         while (i <= spillvar) {
1503
1504                 if (!*si) {
1505                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1506                         info->next = NULL;
1507                         cfg->stack_offset += sizeof (gpointer);
1508                         info->offset = - cfg->stack_offset;
1509                 }
1510
1511                 if (i == spillvar)
1512                         return (*si)->offset;
1513
1514                 i++;
1515                 si = &(*si)->next;
1516         }
1517
1518         g_assert_not_reached ();
1519         return 0;
1520 }
1521
1522 /*
1523  * returns the offset used by spillvar. It allocates a new
1524  * spill float variable if necessary. 
1525  * (same as mono_spillvar_offset but for float)
1526  */
1527 static int
1528 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1529 {
1530         MonoSpillInfo **si, *info;
1531         int i = 0;
1532
1533         si = &cfg->spill_info_float; 
1534         
1535         while (i <= spillvar) {
1536
1537                 if (!*si) {
1538                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1539                         info->next = NULL;
1540                         cfg->stack_offset += sizeof (double);
1541                         info->offset = - cfg->stack_offset;
1542                 }
1543
1544                 if (i == spillvar)
1545                         return (*si)->offset;
1546
1547                 i++;
1548                 si = &(*si)->next;
1549         }
1550
1551         g_assert_not_reached ();
1552         return 0;
1553 }
1554
1555 /*
1556  * Creates a store for spilled floating point items
1557  */
1558 static MonoInst*
1559 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1560 {
1561         MonoInst *store;
1562         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1563         store->sreg1 = reg;
1564         store->inst_destbasereg = AMD64_RBP;
1565         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1566
1567         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)store->inst_offset, reg));
1568         return store;
1569 }
1570
1571 /*
1572  * Creates a load for spilled floating point items 
1573  */
1574 static MonoInst*
1575 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1576 {
1577         MonoInst *load;
1578         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1579         load->dreg = reg;
1580         load->inst_basereg = AMD64_RBP;
1581         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1582
1583         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)load->inst_offset, reg));
1584         return load;
1585 }
1586
1587 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && AMD64_IS_CALLEE_REG ((r)))
1588
1589 typedef struct {
1590         int born_in;
1591         int killed_in;
1592         int last_use;
1593         int prev_use;
1594         int flags;              /* used to track fp spill/load */
1595 } RegTrack;
1596
1597 static const char*const * ins_spec = amd64_desc;
1598
1599 static void
1600 print_ins (int i, MonoInst *ins)
1601 {
1602         const char *spec = ins_spec [ins->opcode];
1603         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1604         if (!spec)
1605                 g_error ("Unknown opcode: %s\n", mono_inst_name (ins->opcode));
1606         if (spec [MONO_INST_DEST]) {
1607                 if (ins->dreg >= MONO_MAX_IREGS)
1608                         g_print (" R%d <-", ins->dreg);
1609                 else
1610                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1611         }
1612         if (spec [MONO_INST_SRC1]) {
1613                 if (ins->sreg1 >= MONO_MAX_IREGS)
1614                         g_print (" R%d", ins->sreg1);
1615                 else
1616                         g_print (" %s", mono_arch_regname (ins->sreg1));
1617         }
1618         if (spec [MONO_INST_SRC2]) {
1619                 if (ins->sreg2 >= MONO_MAX_IREGS)
1620                         g_print (" R%d", ins->sreg2);
1621                 else
1622                         g_print (" %s", mono_arch_regname (ins->sreg2));
1623         }
1624         if (spec [MONO_INST_CLOB])
1625                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1626         g_print ("\n");
1627 }
1628
1629 static void
1630 print_regtrack (RegTrack *t, int num)
1631 {
1632         int i;
1633         char buf [32];
1634         const char *r;
1635         
1636         for (i = 0; i < num; ++i) {
1637                 if (!t [i].born_in)
1638                         continue;
1639                 if (i >= MONO_MAX_IREGS) {
1640                         g_snprintf (buf, sizeof(buf), "R%d", i);
1641                         r = buf;
1642                 } else
1643                         r = mono_arch_regname (i);
1644                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1645         }
1646 }
1647
1648 typedef struct InstList InstList;
1649
1650 struct InstList {
1651         InstList *prev;
1652         InstList *next;
1653         MonoInst *data;
1654 };
1655
1656 static inline InstList*
1657 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1658 {
1659         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1660         item->data = data;
1661         item->prev = NULL;
1662         item->next = list;
1663         if (list)
1664                 list->prev = item;
1665         return item;
1666 }
1667
1668 /*
1669  * Force the spilling of the variable in the symbolic register 'reg'.
1670  */
1671 static int
1672 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1673 {
1674         MonoInst *load;
1675         int i, sel, spill;
1676         
1677         sel = cfg->rs->iassign [reg];
1678         /*i = cfg->rs->isymbolic [sel];
1679         g_assert (i == reg);*/
1680         i = reg;
1681         spill = ++cfg->spill_count;
1682         cfg->rs->iassign [i] = -spill - 1;
1683         mono_regstate_free_int (cfg->rs, sel);
1684         /* we need to create a spill var and insert a load to sel after the current instruction */
1685         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1686         load->dreg = sel;
1687         load->inst_basereg = AMD64_RBP;
1688         load->inst_offset = mono_spillvar_offset (cfg, spill);
1689         if (item->prev) {
1690                 while (ins->next != item->prev->data)
1691                         ins = ins->next;
1692         }
1693         load->next = ins->next;
1694         ins->next = load;
1695         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1696         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1697         g_assert (i == sel);
1698
1699         return sel;
1700 }
1701
1702 static int
1703 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1704 {
1705         MonoInst *load;
1706         int i, sel, spill;
1707
1708         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1709         /* exclude the registers in the current instruction */
1710         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1711                 if (ins->sreg1 >= MONO_MAX_IREGS)
1712                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1713                 else
1714                         regmask &= ~ (1 << ins->sreg1);
1715                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1716         }
1717         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1718                 if (ins->sreg2 >= MONO_MAX_IREGS)
1719                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1720                 else
1721                         regmask &= ~ (1 << ins->sreg2);
1722                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1723         }
1724         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1725                 regmask &= ~ (1 << ins->dreg);
1726                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1727         }
1728
1729         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1730         g_assert (regmask); /* need at least a register we can free */
1731         sel = -1;
1732         /* we should track prev_use and spill the register that's farther */
1733         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1734                 if (regmask & (1 << i)) {
1735                         sel = i;
1736                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1737                         break;
1738                 }
1739         }
1740         i = cfg->rs->isymbolic [sel];
1741         spill = ++cfg->spill_count;
1742         cfg->rs->iassign [i] = -spill - 1;
1743         mono_regstate_free_int (cfg->rs, sel);
1744         /* we need to create a spill var and insert a load to sel after the current instruction */
1745         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1746         load->dreg = sel;
1747         load->inst_basereg = AMD64_RBP;
1748         load->inst_offset = mono_spillvar_offset (cfg, spill);
1749         if (item->prev) {
1750                 while (ins->next != item->prev->data)
1751                         ins = ins->next;
1752         }
1753         load->next = ins->next;
1754         ins->next = load;
1755         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1756         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1757         g_assert (i == sel);
1758         
1759         return sel;
1760 }
1761
1762 static MonoInst*
1763 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1764 {
1765         MonoInst *copy;
1766         MONO_INST_NEW (cfg, copy, OP_MOVE);
1767         copy->dreg = dest;
1768         copy->sreg1 = src;
1769         if (ins) {
1770                 copy->next = ins->next;
1771                 ins->next = copy;
1772         }
1773         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1774         return copy;
1775 }
1776
1777 static MonoInst*
1778 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1779 {
1780         MonoInst *store;
1781         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1782         store->sreg1 = reg;
1783         store->inst_destbasereg = AMD64_RBP;
1784         store->inst_offset = mono_spillvar_offset (cfg, spill);
1785         if (ins) {
1786                 store->next = ins->next;
1787                 ins->next = store;
1788         }
1789         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", spill, (long)store->inst_offset, prev_reg, mono_arch_regname (reg)));
1790         return store;
1791 }
1792
1793 static void
1794 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1795 {
1796         MonoInst *prev;
1797         if (item->next) {
1798                 prev = item->next->data;
1799
1800                 while (prev->next != ins)
1801                         prev = prev->next;
1802                 to_insert->next = ins;
1803                 prev->next = to_insert;
1804         } else {
1805                 to_insert->next = ins;
1806         }
1807         /* 
1808          * needed otherwise in the next instruction we can add an ins to the 
1809          * end and that would get past this instruction.
1810          */
1811         item->data = to_insert; 
1812 }
1813
1814
1815 #if  0
1816 static int
1817 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1818 {
1819         int val = cfg->rs->iassign [sym_reg];
1820         if (val < 0) {
1821                 int spill = 0;
1822                 if (val < -1) {
1823                         /* the register gets spilled after this inst */
1824                         spill = -val -1;
1825                 }
1826                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1827                 if (val < 0)
1828                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1829                 cfg->rs->iassign [sym_reg] = val;
1830                 /* add option to store before the instruction for src registers */
1831                 if (spill)
1832                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1833         }
1834         cfg->rs->isymbolic [val] = sym_reg;
1835         return val;
1836 }
1837 #endif
1838
1839 /* flags used in reginfo->flags */
1840 enum {
1841         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1842         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1843         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1844         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1845         MONO_X86_REG_EAX                                = 1 << 4,
1846         MONO_X86_REG_EDX                                = 1 << 5,
1847         MONO_X86_REG_ECX                                = 1 << 6
1848 };
1849
1850 static int
1851 mono_amd64_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1852 {
1853         int val;
1854         int test_mask = dest_mask;
1855
1856         if (flags & MONO_X86_REG_EAX)
1857                 test_mask &= (1 << AMD64_RAX);
1858         else if (flags & MONO_X86_REG_EDX)
1859                 test_mask &= (1 << AMD64_RDX);
1860         else if (flags & MONO_X86_REG_ECX)
1861                 test_mask &= (1 << AMD64_RCX);
1862         else if (flags & MONO_X86_REG_NOT_ECX)
1863                 test_mask &= ~ (1 << AMD64_RCX);
1864
1865         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1866         if (val >= 0 && test_mask != dest_mask)
1867                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1868
1869         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1870                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1871                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << AMD64_RCX)));
1872         }
1873
1874         if (val < 0) {
1875                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1876                 if (val < 0)
1877                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1878         }
1879
1880         return val;
1881 }
1882
1883
1884 /*#include "cprop.c"*/
1885
1886 /*
1887  * Local register allocation.
1888  * We first scan the list of instructions and we save the liveness info of
1889  * each register (when the register is first used, when it's value is set etc.).
1890  * We also reverse the list of instructions (in the InstList list) because assigning
1891  * registers backwards allows for more tricks to be used.
1892  */
1893 void
1894 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1895 {
1896         MonoInst *ins;
1897         MonoRegState *rs = cfg->rs;
1898         int i, val, fpcount;
1899         RegTrack *reginfo, *reginfof;
1900         RegTrack *reginfo1, *reginfo2, *reginfod;
1901         InstList *tmp, *reversed = NULL;
1902         const char *spec;
1903         guint32 src1_mask, src2_mask, dest_mask;
1904         GList *fspill_list = NULL;
1905         int fspill = 0;
1906
1907         if (!bb->code)
1908                 return;
1909         rs->next_vireg = bb->max_ireg;
1910         rs->next_vfreg = bb->max_freg;
1911         mono_regstate_assign (rs);
1912         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1913         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1914         rs->ifree_mask = AMD64_CALLEE_REGS;
1915
1916         ins = bb->code;
1917
1918         /*if (cfg->opt & MONO_OPT_COPYPROP)
1919                 local_copy_prop (cfg, ins);*/
1920
1921         i = 1;
1922         fpcount = 0;
1923         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1924         /* forward pass on the instructions to collect register liveness info */
1925         while (ins) {
1926                 spec = ins_spec [ins->opcode];
1927                 
1928                 DEBUG (print_ins (i, ins));
1929
1930                 if (spec [MONO_INST_SRC1]) {
1931                         if (spec [MONO_INST_SRC1] == 'f') {
1932                                 GList *spill;
1933                                 reginfo1 = reginfof;
1934
1935                                 spill = g_list_first (fspill_list);
1936                                 if (spill && fpcount < MONO_MAX_FREGS) {
1937                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1938                                         fspill_list = g_list_remove (fspill_list, spill->data);
1939                                 } else
1940                                         fpcount--;
1941                         }
1942                         else
1943                                 reginfo1 = reginfo;
1944                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1945                         reginfo1 [ins->sreg1].last_use = i;
1946                         if (spec [MONO_INST_SRC1] == 'L') {
1947                                 /* The virtual register is allocated sequentially */
1948                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1949                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1950                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1951                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1952
1953                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1954                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1955                         }
1956                 } else {
1957                         ins->sreg1 = -1;
1958                 }
1959                 if (spec [MONO_INST_SRC2]) {
1960                         if (spec [MONO_INST_SRC2] == 'f') {
1961                                 GList *spill;
1962                                 reginfo2 = reginfof;
1963                                 spill = g_list_first (fspill_list);
1964                                 if (spill) {
1965                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1966                                         fspill_list = g_list_remove (fspill_list, spill->data);
1967                                         if (fpcount >= MONO_MAX_FREGS) {
1968                                                 fspill++;
1969                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1970                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1971                                         }
1972                                 } else
1973                                         fpcount--;
1974                         }
1975                         else
1976                                 reginfo2 = reginfo;
1977                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1978                         reginfo2 [ins->sreg2].last_use = i;
1979                         if (spec [MONO_INST_SRC2] == 'L') {
1980                                 /* The virtual register is allocated sequentially */
1981                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1982                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1983                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1984                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1985                         }
1986                         if (spec [MONO_INST_CLOB] == 's') {
1987                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1988                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1989                         }
1990                 } else {
1991                         ins->sreg2 = -1;
1992                 }
1993                 if (spec [MONO_INST_DEST]) {
1994                         if (spec [MONO_INST_DEST] == 'f') {
1995                                 reginfod = reginfof;
1996                                 if (fpcount >= MONO_MAX_FREGS) {
1997                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1998                                         fspill++;
1999                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2000                                         fpcount--;
2001                                 }
2002                                 fpcount++;
2003                         }
2004                         else
2005                                 reginfod = reginfo;
2006                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
2007                                 reginfod [ins->dreg].killed_in = i;
2008                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
2009                         reginfod [ins->dreg].last_use = i;
2010                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
2011                                 reginfod [ins->dreg].born_in = i;
2012                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
2013                                 /* The virtual register is allocated sequentially */
2014                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
2015                                 reginfod [ins->dreg + 1].last_use = i;
2016                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
2017                                         reginfod [ins->dreg + 1].born_in = i;
2018
2019                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
2020                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
2021                         }
2022                 } else {
2023                         ins->dreg = -1;
2024                 }
2025
2026                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2027                 ++i;
2028                 ins = ins->next;
2029         }
2030
2031         // todo: check if we have anything left on fp stack, in verify mode?
2032         fspill = 0;
2033
2034         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2035         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2036         tmp = reversed;
2037         while (tmp) {
2038                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2039                 dest_mask = src1_mask = src2_mask = AMD64_CALLEE_REGS;
2040                 --i;
2041                 ins = tmp->data;
2042                 spec = ins_spec [ins->opcode];
2043                 prev_dreg = -1;
2044                 clob_dreg = -1;
2045                 DEBUG (g_print ("processing:"));
2046                 DEBUG (print_ins (i, ins));
2047                 if (spec [MONO_INST_CLOB] == 's') {
2048                         if (rs->ifree_mask & (1 << AMD64_RCX)) {
2049                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2050                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2051                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2052                                 ins->sreg2 = AMD64_RCX;
2053                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2054                         } else {
2055                                 int need_ecx_spill = TRUE;
2056                                 /* 
2057                                  * we first check if src1/dreg is already assigned a register
2058                                  * and then we force a spill of the var assigned to ECX.
2059                                  */
2060                                 /* the destination register can't be ECX */
2061                                 dest_mask &= ~ (1 << AMD64_RCX);
2062                                 src1_mask &= ~ (1 << AMD64_RCX);
2063                                 val = rs->iassign [ins->dreg];
2064                                 /* 
2065                                  * the destination register is already assigned to ECX:
2066                                  * we need to allocate another register for it and then
2067                                  * copy from this to ECX.
2068                                  */
2069                                 if (val == AMD64_RCX && ins->dreg != ins->sreg2) {
2070                                         int new_dest;
2071                                         new_dest = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2072                                         g_assert (new_dest >= 0);
2073                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2074
2075                                         rs->isymbolic [new_dest] = ins->dreg;
2076                                         rs->iassign [ins->dreg] = new_dest;
2077                                         clob_dreg = ins->dreg;
2078                                         ins->dreg = new_dest;
2079                                         create_copy_ins (cfg, AMD64_RCX, new_dest, ins);
2080                                         need_ecx_spill = FALSE;
2081                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2082                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2083                                         rs->iassign [ins->dreg] = val;
2084                                         rs->isymbolic [val] = prev_dreg;
2085                                         ins->dreg = val;*/
2086                                 }
2087                                 val = rs->iassign [ins->sreg1];
2088                                 if (val == AMD64_RCX) {
2089                                         g_assert_not_reached ();
2090                                 } else if (val >= 0) {
2091                                         /* 
2092                                          * the first src reg was already assigned to a register,
2093                                          * we need to copy it to the dest register because the 
2094                                          * shift instruction clobbers the first operand.
2095                                          */
2096                                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
2097                                         DEBUG (g_print ("\tclob:s moved sreg1 from R%d to R%d\n", val, ins->dreg));
2098                                         insert_before_ins (ins, tmp, copy);
2099                                 }
2100                                 val = rs->iassign [ins->sreg2];
2101                                 if (val >= 0 && val != AMD64_RCX) {
2102                                         MonoInst *move = create_copy_ins (cfg, AMD64_RCX, val, NULL);
2103                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2104                                         move->next = ins;
2105                                         g_assert_not_reached ();
2106                                         /* FIXME: where is move connected to the instruction list? */
2107                                         //tmp->prev->data->next = move;
2108                                 }
2109                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << AMD64_RCX))) {
2110                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RCX]));
2111                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RCX]);
2112                                         mono_regstate_free_int (rs, AMD64_RCX);
2113                                 }
2114                                 /* force-set sreg2 */
2115                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2116                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2117                                 ins->sreg2 = AMD64_RCX;
2118                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2119                         }
2120                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
2121                         int dest_reg = AMD64_RAX;
2122                         int clob_reg = AMD64_RDX;
2123                         if (spec [MONO_INST_DEST] == 'd') {
2124                                 dest_reg = AMD64_RDX; /* reminder */
2125                                 clob_reg = AMD64_RAX;
2126                         }
2127                         val = rs->iassign [ins->dreg];
2128                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2129                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2130                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2131                                 mono_regstate_free_int (rs, dest_reg);
2132                         }
2133                         if (val < 0) {
2134                                 if (val < -1) {
2135                                         /* the register gets spilled after this inst */
2136                                         int spill = -val -1;
2137                                         dest_mask = 1 << clob_reg;
2138                                         prev_dreg = ins->dreg;
2139                                         val = mono_regstate_alloc_int (rs, dest_mask);
2140                                         if (val < 0)
2141                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2142                                         rs->iassign [ins->dreg] = val;
2143                                         if (spill)
2144                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2145                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2146                                         rs->isymbolic [val] = prev_dreg;
2147                                         ins->dreg = val;
2148                                         if (val != dest_reg) { /* force a copy */
2149                                                 create_copy_ins (cfg, val, dest_reg, ins);
2150                                         }
2151                                 } else {
2152                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2153                                         prev_dreg = ins->dreg;
2154                                         rs->iassign [ins->dreg] = dest_reg;
2155                                         rs->isymbolic [dest_reg] = ins->dreg;
2156                                         ins->dreg = dest_reg;
2157                                         rs->ifree_mask &= ~ (1 << dest_reg);
2158                                 }
2159                         } else {
2160                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2161                                 if (val != dest_reg) { /* force a copy */
2162                                         create_copy_ins (cfg, val, dest_reg, ins);
2163                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2164                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2165                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2166                                                 mono_regstate_free_int (rs, dest_reg);
2167                                         }
2168                                 }
2169                         }
2170                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2171                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2172                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2173                                 mono_regstate_free_int (rs, clob_reg);
2174                         }
2175                         src1_mask = 1 << AMD64_RAX;
2176                         src2_mask = 1 << AMD64_RCX;
2177                 }
2178                 if (spec [MONO_INST_DEST] == 'l') {
2179                         int hreg;
2180                         val = rs->iassign [ins->dreg];
2181                         /* check special case when dreg have been moved from ecx (clob shift) */
2182                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2183                                 hreg = clob_dreg + 1;
2184                         else
2185                                 hreg = ins->dreg + 1;
2186
2187                         /* base prev_dreg on fixed hreg, handle clob case */
2188                         val = hreg - 1;
2189
2190                         if (val != rs->isymbolic [AMD64_RAX] && !(rs->ifree_mask & (1 << AMD64_RAX))) {
2191                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2192                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2193                                 mono_regstate_free_int (rs, AMD64_RAX);
2194                         }
2195                         if (hreg != rs->isymbolic [AMD64_RDX] && !(rs->ifree_mask & (1 << AMD64_RDX))) {
2196                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [AMD64_RDX]));
2197                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RDX]);
2198                                 mono_regstate_free_int (rs, AMD64_RDX);
2199                         }
2200                 }
2201
2202                 /* Track dreg */
2203                 if (spec [MONO_INST_DEST] == 'f') {
2204                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2205                                 GList *spill_node;
2206                                 MonoInst *store;
2207                                 spill_node = g_list_first (fspill_list);
2208                                 g_assert (spill_node);
2209
2210                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2211                                 insert_before_ins (ins, tmp, store);
2212                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2213                                 fspill--;
2214                         }
2215                 } else if (spec [MONO_INST_DEST] == 'L') {
2216                         int hreg;
2217                         val = rs->iassign [ins->dreg];
2218                         /* check special case when dreg have been moved from ecx (clob shift) */
2219                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2220                                 hreg = clob_dreg + 1;
2221                         else
2222                                 hreg = ins->dreg + 1;
2223
2224                         /* base prev_dreg on fixed hreg, handle clob case */
2225                         prev_dreg = hreg - 1;
2226
2227                         if (val < 0) {
2228                                 int spill = 0;
2229                                 if (val < -1) {
2230                                         /* the register gets spilled after this inst */
2231                                         spill = -val -1;
2232                                 }
2233                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2234                                 rs->iassign [ins->dreg] = val;
2235                                 if (spill)
2236                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2237                         }
2238
2239                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2240  
2241                         rs->isymbolic [val] = hreg - 1;
2242                         ins->dreg = val;
2243                         
2244                         val = rs->iassign [hreg];
2245                         if (val < 0) {
2246                                 int spill = 0;
2247                                 if (val < -1) {
2248                                         /* the register gets spilled after this inst */
2249                                         spill = -val -1;
2250                                 }
2251                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2252                                 rs->iassign [hreg] = val;
2253                                 if (spill)
2254                                         create_spilled_store (cfg, spill, val, hreg, ins);
2255                         }
2256
2257                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2258                         rs->isymbolic [val] = hreg;
2259                         /* save reg allocating into unused */
2260                         ins->unused = val;
2261
2262                         /* check if we can free our long reg */
2263                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2264                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2265                                 mono_regstate_free_int (rs, val);
2266                         }
2267                 }
2268                 else if (ins->dreg >= MONO_MAX_IREGS) {
2269                         int hreg;
2270                         val = rs->iassign [ins->dreg];
2271                         if (spec [MONO_INST_DEST] == 'l') {
2272                                 /* check special case when dreg have been moved from ecx (clob shift) */
2273                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2274                                         hreg = clob_dreg + 1;
2275                                 else
2276                                         hreg = ins->dreg + 1;
2277
2278                                 /* base prev_dreg on fixed hreg, handle clob case */
2279                                 prev_dreg = hreg - 1;
2280                         } else
2281                                 prev_dreg = ins->dreg;
2282
2283                         if (val < 0) {
2284                                 int spill = 0;
2285                                 if (val < -1) {
2286                                         /* the register gets spilled after this inst */
2287                                         spill = -val -1;
2288                                 }
2289                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2290                                 rs->iassign [ins->dreg] = val;
2291                                 if (spill)
2292                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2293                         }
2294                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2295                         rs->isymbolic [val] = prev_dreg;
2296                         ins->dreg = val;
2297                         /* handle cases where lreg needs to be eax:edx */
2298                         if (spec [MONO_INST_DEST] == 'l') {
2299                                 /* check special case when dreg have been moved from ecx (clob shift) */
2300                                 int hreg = prev_dreg + 1;
2301                                 val = rs->iassign [hreg];
2302                                 if (val < 0) {
2303                                         int spill = 0;
2304                                         if (val < -1) {
2305                                                 /* the register gets spilled after this inst */
2306                                                 spill = -val -1;
2307                                         }
2308                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2309                                         rs->iassign [hreg] = val;
2310                                         if (spill)
2311                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2312                                 }
2313                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2314                                 rs->isymbolic [val] = hreg;
2315                                 if (ins->dreg == AMD64_RAX) {
2316                                         if (val != AMD64_RDX)
2317                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2318                                 } else if (ins->dreg == AMD64_RDX) {
2319                                         if (val == AMD64_RAX) {
2320                                                 /* swap */
2321                                                 g_assert_not_reached ();
2322                                         } else {
2323                                                 /* two forced copies */
2324                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2325                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2326                                         }
2327                                 } else {
2328                                         if (val == AMD64_RDX) {
2329                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2330                                         } else {
2331                                                 /* two forced copies */
2332                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2333                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2334                                         }
2335                                 }
2336                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2337                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2338                                         mono_regstate_free_int (rs, val);
2339                                 }
2340                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != AMD64_RAX && spec [MONO_INST_CLOB] != 'd') {
2341                                 /* this instruction only outputs to EAX, need to copy */
2342                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2343                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != AMD64_RDX && spec [MONO_INST_CLOB] != 'd') {
2344                                 create_copy_ins (cfg, ins->dreg, AMD64_RDX, ins);
2345                         }
2346                 }
2347                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2348                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2349                         mono_regstate_free_int (rs, ins->dreg);
2350                 }
2351                 /* put src1 in EAX if it needs to be */
2352                 if (spec [MONO_INST_SRC1] == 'a') {
2353                         if (!(rs->ifree_mask & (1 << AMD64_RAX))) {
2354                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2355                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2356                                 mono_regstate_free_int (rs, AMD64_RAX);
2357                         }
2358                         /* force-set sreg1 */
2359                         rs->iassign [ins->sreg1] = AMD64_RAX;
2360                         rs->isymbolic [AMD64_RAX] = ins->sreg1;
2361                         ins->sreg1 = AMD64_RAX;
2362                         rs->ifree_mask &= ~ (1 << AMD64_RAX);
2363                 }
2364
2365                 /* Track sreg1 */
2366                 if (spec [MONO_INST_SRC1] == 'f') {
2367                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2368                                 MonoInst *load;
2369                                 MonoInst *store = NULL;
2370
2371                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2372                                         GList *spill_node;
2373                                         spill_node = g_list_first (fspill_list);
2374                                         g_assert (spill_node);
2375
2376                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2377                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2378                                 }
2379
2380                                 fspill++;
2381                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2382                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2383                                 insert_before_ins (ins, tmp, load);
2384                                 if (store) 
2385                                         insert_before_ins (load, tmp, store);
2386                         }
2387                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2388                         /* force source to be same as dest */
2389                         rs->iassign [ins->sreg1] = ins->dreg;
2390                         rs->iassign [ins->sreg1 + 1] = ins->unused;
2391
2392                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2393                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2394
2395                         ins->sreg1 = ins->dreg;
2396                         /* 
2397                          * No need for saving the reg, we know that src1=dest in this cases
2398                          * ins->inst_c0 = ins->unused;
2399                          */
2400
2401                         /* make sure that we remove them from free mask */
2402                         rs->ifree_mask &= ~ (1 << ins->dreg);
2403                         rs->ifree_mask &= ~ (1 << ins->unused);
2404                 }
2405                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2406                         val = rs->iassign [ins->sreg1];
2407                         prev_sreg1 = ins->sreg1;
2408                         if (val < 0) {
2409                                 int spill = 0;
2410                                 if (val < -1) {
2411                                         /* the register gets spilled after this inst */
2412                                         spill = -val -1;
2413                                 }
2414                                 if (0 && ins->opcode == OP_MOVE) {
2415                                         /* 
2416                                          * small optimization: the dest register is already allocated
2417                                          * but the src one is not: we can simply assign the same register
2418                                          * here and peephole will get rid of the instruction later.
2419                                          * This optimization may interfere with the clobbering handling:
2420                                          * it removes a mov operation that will be added again to handle clobbering.
2421                                          * There are also some other issues that should with make testjit.
2422                                          */
2423                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2424                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2425                                         //g_assert (val >= 0);
2426                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2427                                 } else {
2428                                         //g_assert (val == -1); /* source cannot be spilled */
2429                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2430                                         rs->iassign [ins->sreg1] = val;
2431                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2432                                 }
2433                                 if (spill) {
2434                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2435                                         insert_before_ins (ins, tmp, store);
2436                                 }
2437                         }
2438                         rs->isymbolic [val] = prev_sreg1;
2439                         ins->sreg1 = val;
2440                 } else {
2441                         prev_sreg1 = -1;
2442                 }
2443                 /* handle clobbering of sreg1 */
2444                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2445                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2446                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2447                         if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
2448                                 /* note: the copy is inserted before the current instruction! */
2449                                 insert_before_ins (ins, tmp, copy);
2450                                 /* we set sreg1 to dest as well */
2451                                 prev_sreg1 = ins->sreg1 = ins->dreg;
2452                         } else {
2453                                 /* inserted after the operation */
2454                                 copy->next = ins->next;
2455                                 ins->next = copy;
2456                         }
2457                 }
2458                 /* track sreg2 */
2459                 if (spec [MONO_INST_SRC2] == 'f') {
2460                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2461                                 MonoInst *load;
2462                                 MonoInst *store = NULL;
2463
2464                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2465                                         GList *spill_node;
2466
2467                                         spill_node = g_list_first (fspill_list);
2468                                         g_assert (spill_node);
2469                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2470                                                 spill_node = g_list_next (spill_node);
2471         
2472                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2473                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2474                                 } 
2475                                 
2476                                 fspill++;
2477                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2478                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2479                                 insert_before_ins (ins, tmp, load);
2480                                 if (store) 
2481                                         insert_before_ins (load, tmp, store);
2482                         }
2483                 } 
2484                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2485                         val = rs->iassign [ins->sreg2];
2486                         prev_sreg2 = ins->sreg2;
2487                         if (val < 0) {
2488                                 int spill = 0;
2489                                 if (val < -1) {
2490                                         /* the register gets spilled after this inst */
2491                                         spill = -val -1;
2492                                 }
2493                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2494                                 rs->iassign [ins->sreg2] = val;
2495                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2496                                 if (spill)
2497                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2498                         }
2499                         rs->isymbolic [val] = prev_sreg2;
2500                         ins->sreg2 = val;
2501                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != AMD64_RCX) {
2502                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [AMD64_RCX]));
2503                         }
2504                 } else {
2505                         prev_sreg2 = -1;
2506                 }
2507
2508                 if (spec [MONO_INST_CLOB] == 'c') {
2509                         int j, s;
2510                         guint32 clob_mask = AMD64_CALLEE_REGS;
2511                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2512                                 s = 1 << j;
2513                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2514                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2515                                 }
2516                         }
2517                 }
2518                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2519                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2520                         mono_regstate_free_int (rs, ins->sreg1);
2521                 }
2522                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2523                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2524                         mono_regstate_free_int (rs, ins->sreg2);
2525                 }*/
2526         
2527                 //DEBUG (print_ins (i, ins));
2528                 /* this may result from a insert_before call */
2529                 if (!tmp->next)
2530                         bb->code = tmp->data;
2531                 tmp = tmp->next;
2532         }
2533
2534         g_free (reginfo);
2535         g_free (reginfof);
2536         g_list_free (fspill_list);
2537 }
2538
2539 static unsigned char*
2540 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2541 {
2542         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2543         x86_fnstcw_membase(code, AMD64_RSP, 0);
2544         amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
2545         amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2546         amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
2547         amd64_fldcw_membase (code, AMD64_RSP, 2);
2548         amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
2549         amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
2550         amd64_pop_reg (code, dreg);
2551         amd64_fldcw_membase (code, AMD64_RSP, 0);
2552         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2553
2554         if (size == 1)
2555                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
2556         else if (size == 2)
2557                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
2558         return code;
2559 }
2560
2561 static unsigned char*
2562 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2563 {
2564         int sreg = tree->sreg1;
2565 #ifdef PLATFORM_WIN32
2566         guint8* br[5];
2567
2568         NOT_IMPLEMENTED;
2569
2570         /*
2571          * Under Windows:
2572          * If requested stack size is larger than one page,
2573          * perform stack-touch operation
2574          */
2575         /*
2576          * Generate stack probe code.
2577          * Under Windows, it is necessary to allocate one page at a time,
2578          * "touching" stack after each successful sub-allocation. This is
2579          * because of the way stack growth is implemented - there is a
2580          * guard page before the lowest stack page that is currently commited.
2581          * Stack normally grows sequentially so OS traps access to the
2582          * guard page and commits more pages when needed.
2583          */
2584         amd64_test_reg_imm (code, sreg, ~0xFFF);
2585         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2586
2587         br[2] = code; /* loop */
2588         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
2589         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
2590         amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2591         amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2592         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2593         amd64_patch (br[3], br[2]);
2594         amd64_test_reg_reg (code, sreg, sreg);
2595         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2596         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2597
2598         br[1] = code; x86_jump8 (code, 0);
2599
2600         amd64_patch (br[0], code);
2601         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2602         amd64_patch (br[1], code);
2603         amd64_patch (br[4], code);
2604 #else /* PLATFORM_WIN32 */
2605         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
2606 #endif
2607         if (tree->flags & MONO_INST_INIT) {
2608                 int offset = 0;
2609                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
2610                         amd64_push_reg (code, AMD64_RAX);
2611                         offset += 8;
2612                 }
2613                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
2614                         amd64_push_reg (code, AMD64_RCX);
2615                         offset += 8;
2616                 }
2617                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
2618                         amd64_push_reg (code, AMD64_RDI);
2619                         offset += 8;
2620                 }
2621                 
2622                 amd64_shift_reg_imm (code, X86_SHR, sreg, 4);
2623                 if (sreg != AMD64_RCX)
2624                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
2625                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2626                                 
2627                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
2628                 amd64_cld (code);
2629                 amd64_prefix (code, X86_REP_PREFIX);
2630                 amd64_stosl (code);
2631                 
2632                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
2633                         amd64_pop_reg (code, AMD64_RDI);
2634                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
2635                         amd64_pop_reg (code, AMD64_RCX);
2636                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
2637                         amd64_pop_reg (code, AMD64_RAX);
2638         }
2639         return code;
2640 }
2641
2642 static guint8*
2643 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2644 {
2645         CallInfo *cinfo;
2646         guint32 offset, quad;
2647
2648         /* Move return value to the target register */
2649         /* FIXME: do this in the local reg allocator */
2650         switch (ins->opcode) {
2651         case CEE_CALL:
2652         case OP_CALL_REG:
2653         case OP_CALL_MEMBASE:
2654         case OP_LCALL:
2655         case OP_LCALL_REG:
2656         case OP_LCALL_MEMBASE:
2657                 if (ins->dreg != AMD64_RAX)
2658                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, 8);
2659                 break;
2660         case OP_FCALL:
2661         case OP_FCALL_REG:
2662         case OP_FCALL_MEMBASE:
2663                 /* FIXME: optimize this */
2664                 offset = mono_spillvar_offset_float (cfg, 0);
2665                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
2666                         amd64_movss_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2667                         amd64_fld_membase (code, AMD64_RBP, offset, FALSE);
2668                 }
2669                 else {
2670                         amd64_movsd_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2671                         amd64_fld_membase (code, AMD64_RBP, offset, TRUE);
2672                 }
2673                 break;
2674         case OP_VCALL:
2675         case OP_VCALL_REG:
2676         case OP_VCALL_MEMBASE:
2677                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2678                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2679                         /* Pop the destination address from the stack */
2680                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2681                         amd64_pop_reg (code, AMD64_RCX);
2682                         
2683                         for (quad = 0; quad < 2; quad ++) {
2684                                 switch (cinfo->ret.pair_storage [quad]) {
2685                                 case ArgInIReg:
2686                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
2687                                         break;
2688                                 case ArgInFloatSSEReg:
2689                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2690                                         break;
2691                                 case ArgInDoubleSSEReg:
2692                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2693                                         break;
2694                                 case ArgNone:
2695                                         break;
2696                                 default:
2697                                         NOT_IMPLEMENTED;
2698                                 }
2699                         }
2700                 }
2701                 break;
2702         }
2703
2704         return code;
2705 }
2706
2707 /*
2708  * emit_load_volatile_arguments:
2709  *
2710  *  Load volatile arguments from the stack to the original input registers.
2711  * Required before a tail call.
2712  */
2713 static guint8*
2714 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2715 {
2716         MonoMethod *method = cfg->method;
2717         MonoMethodSignature *sig;
2718         MonoInst *inst;
2719         CallInfo *cinfo;
2720         guint32 i;
2721
2722         /* FIXME: Generate intermediate code instead */
2723
2724         sig = method->signature;
2725
2726         cinfo = get_call_info (sig, FALSE);
2727         
2728         /* This is the opposite of the code in emit_prolog */
2729
2730         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2731                 ArgInfo *ainfo = cinfo->args + i;
2732                 MonoType *arg_type;
2733                 inst = cfg->varinfo [i];
2734
2735                 if (sig->hasthis && (i == 0))
2736                         arg_type = &mono_defaults.object_class->byval_arg;
2737                 else
2738                         arg_type = sig->params [i - sig->hasthis];
2739
2740                 if (inst->opcode != OP_REGVAR) {
2741                         switch (ainfo->storage) {
2742                         case ArgInIReg: {
2743                                 guint32 size = 8;
2744
2745                                 /* FIXME: I1 etc */
2746                                 amd64_mov_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset, size);
2747                                 break;
2748                         }
2749                         case ArgInFloatSSEReg:
2750                                 amd64_movss_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2751                                 break;
2752                         case ArgInDoubleSSEReg:
2753                                 amd64_movsd_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2754                                 break;
2755                         default:
2756                                 break;
2757                         }
2758                 }
2759         }
2760
2761         g_free (cinfo);
2762
2763         return code;
2764 }
2765
2766 /*
2767  * emit_load_arguments:
2768  *
2769  *   Load arguments into the proper registers before a call.
2770  */
2771 static guint8*
2772 emit_load_arguments (MonoCompile *cfg, MonoCallInst *call, guint8 *code)
2773 {
2774         GSList *list;
2775
2776         list = call->out_reg_args;
2777         if (list) {
2778                 while (list) {
2779                         MonoInst *arg = (MonoInst*)(list->data);
2780                         amd64_mov_reg_membase (code, arg->unused, AMD64_RBP, arg->inst_left->inst_offset, 8);
2781                         list = g_slist_next (list);
2782                 }
2783                 g_slist_free (call->out_reg_args);
2784         }
2785
2786         return code;
2787 }
2788
2789 #define REAL_PRINT_REG(text,reg) \
2790 mono_assert (reg >= 0); \
2791 amd64_push_reg (code, AMD64_RAX); \
2792 amd64_push_reg (code, AMD64_RDX); \
2793 amd64_push_reg (code, AMD64_RCX); \
2794 amd64_push_reg (code, reg); \
2795 amd64_push_imm (code, reg); \
2796 amd64_push_imm (code, text " %d %p\n"); \
2797 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2798 amd64_call_reg (code, AMD64_RAX); \
2799 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2800 amd64_pop_reg (code, AMD64_RCX); \
2801 amd64_pop_reg (code, AMD64_RDX); \
2802 amd64_pop_reg (code, AMD64_RAX);
2803
2804 /* benchmark and set based on cpu */
2805 #define LOOP_ALIGNMENT 8
2806 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2807
2808 void
2809 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2810 {
2811         MonoInst *ins;
2812         MonoCallInst *call;
2813         guint offset;
2814         guint8 *code = cfg->native_code + cfg->code_len;
2815         MonoInst *last_ins = NULL;
2816         guint last_offset = 0;
2817         int max_len, cpos;
2818
2819         if (cfg->opt & MONO_OPT_PEEPHOLE)
2820                 peephole_pass (cfg, bb);
2821
2822         if (cfg->opt & MONO_OPT_LOOP) {
2823                 int pad, align = LOOP_ALIGNMENT;
2824                 /* set alignment depending on cpu */
2825                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2826                         pad = align - pad;
2827                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2828                         amd64_padding (code, pad);
2829                         cfg->code_len += pad;
2830                         bb->native_offset = cfg->code_len;
2831                 }
2832         }
2833
2834         if (cfg->verbose_level > 2)
2835                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2836
2837         cpos = bb->max_offset;
2838
2839         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2840                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2841                 g_assert (!mono_compile_aot);
2842                 cpos += 6;
2843
2844                 cov->data [bb->dfn].cil_code = bb->cil_code;
2845                 /* this is not thread save, but good enough */
2846                 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count); 
2847         }
2848
2849         offset = code - cfg->native_code;
2850
2851         ins = bb->code;
2852         while (ins) {
2853                 offset = code - cfg->native_code;
2854
2855                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2856
2857                 if (offset > (cfg->code_size - max_len - 16)) {
2858                         cfg->code_size *= 2;
2859                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2860                         code = cfg->native_code + offset;
2861                         mono_jit_stats.code_reallocs++;
2862                 }
2863
2864                 mono_debug_record_line_number (cfg, ins, offset);
2865
2866                 switch (ins->opcode) {
2867                 case OP_BIGMUL:
2868                         amd64_mul_reg (code, ins->sreg2, TRUE);
2869                         break;
2870                 case OP_BIGMUL_UN:
2871                         amd64_mul_reg (code, ins->sreg2, FALSE);
2872                         break;
2873                 case OP_X86_SETEQ_MEMBASE:
2874                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2875                         break;
2876                 case OP_STOREI1_MEMBASE_IMM:
2877                         g_assert (amd64_is_imm32 (ins->inst_imm));
2878                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2879                         break;
2880                 case OP_STOREI2_MEMBASE_IMM:
2881                         g_assert (amd64_is_imm32 (ins->inst_imm));
2882                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2883                         break;
2884                 case OP_STOREI4_MEMBASE_IMM:
2885                         g_assert (amd64_is_imm32 (ins->inst_imm));
2886                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2887                         break;
2888                 case OP_STOREI1_MEMBASE_REG:
2889                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2890                         break;
2891                 case OP_STOREI2_MEMBASE_REG:
2892                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2893                         break;
2894                 case OP_STORE_MEMBASE_REG:
2895                 case OP_STOREI8_MEMBASE_REG:
2896                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2897                         break;
2898                 case OP_STOREI4_MEMBASE_REG:
2899                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2900                         break;
2901                 case OP_STORE_MEMBASE_IMM:
2902                 case OP_STOREI8_MEMBASE_IMM:
2903                         if (amd64_is_imm32 (ins->inst_imm))
2904                                 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2905                         else {
2906                                 amd64_mov_reg_imm (code, GP_SCRATCH_REG, ins->inst_imm);
2907                                 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, GP_SCRATCH_REG, 8);
2908                         }
2909                         break;
2910                 case CEE_LDIND_I:
2911                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, sizeof (gpointer));
2912                         break;
2913                 case CEE_LDIND_I4:
2914                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2915                         break;
2916                 case CEE_LDIND_U4:
2917                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2918                         break;
2919                 case OP_LOADU4_MEM:
2920                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2921                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2922                         break;
2923                 case OP_LOAD_MEMBASE:
2924                 case OP_LOADI8_MEMBASE:
2925                         if (amd64_is_imm32 (ins->inst_offset)) {
2926                                 amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2927                         }
2928                         else {
2929                                 amd64_mov_reg_imm_size (code, GP_SCRATCH_REG, ins->inst_offset, 8);
2930                                 amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, GP_SCRATCH_REG, 0, 8);
2931                         }
2932                         break;
2933                 case OP_LOADI4_MEMBASE:
2934                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2935                         break;
2936                 case OP_LOADU4_MEMBASE:
2937                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2938                         break;
2939                 case OP_LOADU1_MEMBASE:
2940                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2941                         break;
2942                 case OP_LOADI1_MEMBASE:
2943                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2944                         break;
2945                 case OP_LOADU2_MEMBASE:
2946                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2947                         break;
2948                 case OP_LOADI2_MEMBASE:
2949                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2950                         break;
2951                 case CEE_CONV_I1:
2952                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2953                         break;
2954                 case CEE_CONV_I2:
2955                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2956                         break;
2957                 case CEE_CONV_U1:
2958                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2959                         break;
2960                 case CEE_CONV_U2:
2961                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2962                         break;
2963                 case CEE_CONV_U8:
2964                 case CEE_CONV_U:
2965                         /* Clean out the upper word */
2966                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2967                         break;
2968                 case CEE_CONV_I8:
2969                 case CEE_CONV_I:
2970                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2971                         break;                  
2972                 case OP_COMPARE:
2973                 case OP_LCOMPARE:
2974                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2975                         break;
2976                 case OP_COMPARE_IMM:
2977                         if (!amd64_is_imm32 (ins->inst_imm)) {
2978                                 amd64_mov_reg_imm (code, AMD64_R11, ins->inst_imm);
2979                                 amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, AMD64_R11);
2980                         } else {
2981                                 amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2982                         }
2983                         break;
2984                 case OP_X86_COMPARE_MEMBASE_REG:
2985                         amd64_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2986                         break;
2987                 case OP_X86_COMPARE_MEMBASE_IMM:
2988                         g_assert (amd64_is_imm32 (ins->inst_imm));
2989                         amd64_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2990                         break;
2991                 case OP_X86_COMPARE_REG_MEMBASE:
2992                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2993                         break;
2994                 case OP_X86_TEST_NULL:
2995                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
2996                         break;
2997                 case OP_X86_ADD_MEMBASE_IMM:
2998                         /* FIXME: Make a 64 version too */
2999                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3000                         break;
3001                 case OP_X86_ADD_MEMBASE:
3002                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3003                         break;
3004                 case OP_X86_SUB_MEMBASE_IMM:
3005                         g_assert (amd64_is_imm32 (ins->inst_imm));
3006                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3007                         break;
3008                 case OP_X86_SUB_MEMBASE:
3009                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3010                         break;
3011                 case OP_X86_INC_MEMBASE:
3012                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3013                         break;
3014                 case OP_X86_INC_REG:
3015                         amd64_inc_reg_size (code, ins->dreg, 4);
3016                         break;
3017                 case OP_X86_DEC_MEMBASE:
3018                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3019                         break;
3020                 case OP_X86_DEC_REG:
3021                         amd64_dec_reg_size (code, ins->dreg, 4);
3022                         break;
3023                 case OP_X86_MUL_MEMBASE:
3024                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3025                         break;
3026                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
3027                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3028                         break;
3029                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3030                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3031                         break;
3032                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
3033                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3034                         break;
3035                 case CEE_BREAK:
3036                         amd64_breakpoint (code);
3037                         break;
3038
3039                 case OP_ADDCC:
3040                 case CEE_ADD:
3041                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
3042                         break;
3043                 case OP_ADC:
3044                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
3045                         break;
3046                 case OP_ADD_IMM:
3047                         g_assert (amd64_is_imm32 (ins->inst_imm));
3048                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
3049                         break;
3050                 case OP_ADC_IMM:
3051                         g_assert (amd64_is_imm32 (ins->inst_imm));
3052                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
3053                         break;
3054                 case OP_SUBCC:
3055                 case CEE_SUB:
3056                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
3057                         break;
3058                 case OP_SBB:
3059                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
3060                         break;
3061                 case OP_SUB_IMM:
3062                         g_assert (amd64_is_imm32 (ins->inst_imm));
3063                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
3064                         break;
3065                 case OP_SBB_IMM:
3066                         g_assert (amd64_is_imm32 (ins->inst_imm));
3067                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
3068                         break;
3069                 case CEE_AND:
3070                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
3071                         break;
3072                 case OP_AND_IMM:
3073                         g_assert (amd64_is_imm32 (ins->inst_imm));
3074                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3075                         break;
3076                 case CEE_MUL:
3077                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3078                         break;
3079                 case OP_MUL_IMM:
3080                         amd64_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3081                         break;
3082                 case CEE_DIV:
3083                         amd64_cdq (code);
3084                         amd64_div_reg (code, ins->sreg2, TRUE);
3085                         break;
3086                 case CEE_DIV_UN:
3087                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3088                         amd64_div_reg (code, ins->sreg2, FALSE);
3089                         break;
3090                 case OP_DIV_IMM:
3091                         g_assert (amd64_is_imm32 (ins->inst_imm));
3092                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3093                         amd64_cdq (code);
3094                         amd64_div_reg (code, ins->sreg2, TRUE);
3095                         break;
3096                 case CEE_REM:
3097                         amd64_cdq (code);
3098                         amd64_div_reg (code, ins->sreg2, TRUE);
3099                         break;
3100                 case CEE_REM_UN:
3101                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3102                         amd64_div_reg (code, ins->sreg2, FALSE);
3103                         break;
3104                 case OP_REM_IMM:
3105                         g_assert (amd64_is_imm32 (ins->inst_imm));
3106                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3107                         amd64_cdq (code);
3108                         amd64_div_reg (code, ins->sreg2, TRUE);
3109                         break;
3110                 case CEE_OR:
3111                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3112                         break;
3113                 case OP_OR_IMM
3114 :                       g_assert (amd64_is_imm32 (ins->inst_imm));
3115                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3116                         break;
3117                 case CEE_XOR:
3118                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3119                         break;
3120                 case OP_XOR_IMM:
3121                         g_assert (amd64_is_imm32 (ins->inst_imm));
3122                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3123                         break;
3124                 case CEE_SHL:
3125                 case OP_LSHL:
3126                         g_assert (ins->sreg2 == AMD64_RCX);
3127                         amd64_shift_reg (code, X86_SHL, ins->dreg);
3128                         break;
3129                 case CEE_SHR:
3130                 case OP_LSHR:
3131                         g_assert (ins->sreg2 == AMD64_RCX);
3132                         amd64_shift_reg (code, X86_SAR, ins->dreg);
3133                         break;
3134                 case OP_SHR_IMM:
3135                         g_assert (amd64_is_imm32 (ins->inst_imm));
3136                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3137                         break;
3138                 case OP_LSHR_IMM:
3139                         g_assert (amd64_is_imm32 (ins->inst_imm));
3140                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3141                         break;
3142                 case OP_SHR_UN_IMM:
3143                         g_assert (amd64_is_imm32 (ins->inst_imm));
3144                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3145                         break;
3146                 case OP_LSHR_UN_IMM:
3147                         g_assert (amd64_is_imm32 (ins->inst_imm));
3148                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3149                         break;
3150                 case CEE_SHR_UN:
3151                         g_assert (ins->sreg2 == AMD64_RCX);
3152                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3153                         break;
3154                 case OP_LSHR_UN:
3155                         g_assert (ins->sreg2 == AMD64_RCX);
3156                         amd64_shift_reg (code, X86_SHR, ins->dreg);
3157                         break;
3158                 case OP_SHL_IMM:
3159                         g_assert (amd64_is_imm32 (ins->inst_imm));
3160                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3161                         break;
3162                 case OP_LSHL_IMM:
3163                         g_assert (amd64_is_imm32 (ins->inst_imm));
3164                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3165                         break;
3166
3167                 case OP_IADDCC:
3168                 case OP_IADD:
3169                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
3170                         break;
3171                 case OP_IADC:
3172                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
3173                         break;
3174                 case OP_IADD_IMM:
3175                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
3176                         break;
3177                 case OP_IADC_IMM:
3178                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
3179                         break;
3180                 case OP_ISUBCC:
3181                 case OP_ISUB:
3182                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
3183                         break;
3184                 case OP_ISBB:
3185                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
3186                         break;
3187                 case OP_ISUB_IMM:
3188                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
3189                         break;
3190                 case OP_ISBB_IMM:
3191                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
3192                         break;
3193                 case OP_IAND:
3194                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
3195                         break;
3196                 case OP_IAND_IMM:
3197                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
3198                         break;
3199                 case OP_IOR:
3200                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
3201                         break;
3202                 case OP_IOR_IMM:
3203                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
3204                         break;
3205                 case OP_IXOR:
3206                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
3207                         break;
3208                 case OP_IXOR_IMM:
3209                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
3210                         break;
3211                 case OP_INEG:
3212                         amd64_neg_reg_size (code, ins->sreg1, 4);
3213                         break;
3214                 case OP_INOT:
3215                         amd64_not_reg_size (code, ins->sreg1, 4);
3216                         break;
3217                 case OP_ISHL:
3218                         g_assert (ins->sreg2 == AMD64_RCX);
3219                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
3220                         break;
3221                 case OP_ISHR:
3222                         g_assert (ins->sreg2 == AMD64_RCX);
3223                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
3224                         break;
3225                 case OP_ISHR_IMM:
3226                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3227                         break;
3228                 case OP_ISHR_UN_IMM:
3229                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3230                         break;
3231                 case OP_ISHR_UN:
3232                         g_assert (ins->sreg2 == AMD64_RCX);
3233                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3234                         break;
3235                 case OP_ISHL_IMM:
3236                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3237                         break;
3238                 case OP_IMUL:
3239                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3240                         break;
3241                 case OP_IMUL_IMM:
3242                         amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
3243                         break;
3244                 case OP_IMUL_OVF:
3245                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3246                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3247                         break;
3248                 case OP_IMUL_OVF_UN: {
3249                         /* the mul operation and the exception check should most likely be split */
3250                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3251                         /*g_assert (ins->sreg2 == X86_EAX);
3252                         g_assert (ins->dreg == X86_EAX);*/
3253                         if (ins->sreg2 == X86_EAX) {
3254                                 non_eax_reg = ins->sreg1;
3255                         } else if (ins->sreg1 == X86_EAX) {
3256                                 non_eax_reg = ins->sreg2;
3257                         } else {
3258                                 /* no need to save since we're going to store to it anyway */
3259                                 if (ins->dreg != X86_EAX) {
3260                                         saved_eax = TRUE;
3261                                         amd64_push_reg (code, X86_EAX);
3262                                 }
3263                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3264                                 non_eax_reg = ins->sreg2;
3265                         }
3266                         if (ins->dreg == X86_EDX) {
3267                                 if (!saved_eax) {
3268                                         saved_eax = TRUE;
3269                                         amd64_push_reg (code, X86_EAX);
3270                                 }
3271                         } else if (ins->dreg != X86_EAX) {
3272                                 saved_edx = TRUE;
3273                                 amd64_push_reg (code, X86_EDX);
3274                         }
3275                         amd64_mul_reg_size (code, non_eax_reg, FALSE, 4);
3276                         /* save before the check since pop and mov don't change the flags */
3277                         if (ins->dreg != X86_EAX)
3278                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3279                         if (saved_edx)
3280                                 amd64_pop_reg (code, X86_EDX);
3281                         if (saved_eax)
3282                                 amd64_pop_reg (code, X86_EAX);
3283                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3284                         break;
3285                 }
3286                 case OP_IDIV:
3287                         amd64_cdq_size (code, 4);
3288                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3289                         break;
3290                 case OP_IDIV_UN:
3291                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3292                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3293                         break;
3294                 case OP_IDIV_IMM:
3295                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3296                         amd64_cdq_size (code, 4);
3297                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3298                         break;
3299                 case OP_IREM:
3300                         amd64_cdq_size (code, 4);
3301                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3302                         break;
3303                 case OP_IREM_UN:
3304                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3305                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3306                         break;
3307                 case OP_IREM_IMM:
3308                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3309                         amd64_cdq_size (code, 4);
3310                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3311                         break;
3312
3313                 case OP_ICOMPARE:
3314                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3315                         break;
3316                 case OP_ICOMPARE_IMM:
3317                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
3318                         break;
3319
3320                 case OP_IBEQ:
3321                 case OP_IBLT:
3322                 case OP_IBGT:
3323                 case OP_IBGE:
3324                 case OP_IBLE:
3325                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
3326                         break;
3327                 case OP_IBNE_UN:
3328                 case OP_IBLT_UN:
3329                 case OP_IBGT_UN:
3330                 case OP_IBGE_UN:
3331                 case OP_IBLE_UN:
3332                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
3333                         break;
3334                 case OP_COND_EXC_IOV:
3335                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3336                                                                                 TRUE, ins->inst_p1);
3337                         break;
3338                 case OP_COND_EXC_IC:
3339                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3340                                                                                 FALSE, ins->inst_p1);
3341                         break;
3342                 case CEE_NOT:
3343                         amd64_not_reg (code, ins->sreg1);
3344                         break;
3345                 case CEE_NEG:
3346                         amd64_neg_reg (code, ins->sreg1);
3347                         break;
3348                 case OP_SEXT_I1:
3349                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3350                         break;
3351                 case OP_SEXT_I2:
3352                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3353                         break;
3354                 case OP_ICONST:
3355                 case OP_I8CONST:
3356                         if ((((guint64)ins->inst_c0) >> 32) == 0)
3357                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
3358                         else
3359                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
3360                         break;
3361                 case OP_AOTCONST:
3362                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3363                         amd64_set_reg_template (code, ins->dreg);
3364                         break;
3365                 case CEE_CONV_I4:
3366                 case CEE_CONV_U4:
3367                 case OP_MOVE:
3368                 case OP_SETREG:
3369                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
3370                         break;
3371                 case OP_AMD64_SET_XMMREG_R4: {
3372                         /* FIXME: optimize this */
3373                         amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE);
3374                         /* ins->dreg is set to -1 by the reg allocator */
3375                         amd64_movss_reg_membase (code, ins->unused, AMD64_RSP, -8);
3376                         break;
3377                 }
3378                 case OP_AMD64_SET_XMMREG_R8: {
3379                         /* FIXME: optimize this */
3380                         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE);
3381                         /* ins->dreg is set to -1 by the reg allocator */
3382                         amd64_movsd_reg_membase (code, ins->unused, AMD64_RSP, -8);
3383                         break;
3384                 }
3385                 case CEE_JMP: {
3386                         /*
3387                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3388                          * Keep in sync with the code in emit_epilog.
3389                          */
3390                         int pos = 0, i;
3391
3392                         /* FIXME: no tracing support... */
3393                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3394                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3395
3396                         g_assert (!cfg->method->save_lmf);
3397
3398                         code = emit_load_volatile_arguments (cfg, code);
3399
3400                         for (i = 0; i < AMD64_NREG; ++i)
3401                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
3402                                         pos -= sizeof (gpointer);
3403                         
3404                         if (pos)
3405                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
3406
3407                         /* Pop registers in reverse order */
3408                         for (i = AMD64_NREG - 1; i > 0; --i)
3409                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3410                                         amd64_pop_reg (code, i);
3411                                 }
3412
3413                         amd64_leave (code);
3414                         offset = code - cfg->native_code;
3415                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3416                         amd64_set_reg_template (code, AMD64_R11);
3417                         amd64_jump_reg (code, AMD64_R11);
3418                         break;
3419                 }
3420                 case OP_CHECK_THIS:
3421                         /* ensure ins->sreg1 is not NULL */
3422                         amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
3423                         break;
3424                 case OP_ARGLIST: {
3425                         int hreg = ins->sreg1 == AMD64_RAX? AMD64_RCX: AMD64_RAX;
3426                         amd64_push_reg (code, hreg);
3427                         amd64_lea_membase (code, hreg, AMD64_RBP, cfg->sig_cookie);
3428                         amd64_mov_membase_reg (code, ins->sreg1, 0, hreg, 8);
3429                         amd64_pop_reg (code, hreg);
3430                         break;
3431                 }
3432                 case OP_FCALL:
3433                 case OP_LCALL:
3434                 case OP_VCALL:
3435                 case OP_VOIDCALL:
3436                 case CEE_CALL:
3437                         call = (MonoCallInst*)ins;
3438                         /*
3439                          * The AMD64 ABI forces callers to know about varargs.
3440                          */
3441                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
3442                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3443
3444                         code = emit_load_arguments (cfg, call, code);
3445
3446                         if (ins->flags & MONO_INST_HAS_METHOD)
3447                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3448                         else
3449                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3450                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3451                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3452                         code = emit_move_return_value (cfg, ins, code);
3453                         break;
3454                 case OP_FCALL_REG:
3455                 case OP_LCALL_REG:
3456                 case OP_VCALL_REG:
3457                 case OP_VOIDCALL_REG:
3458                 case OP_CALL_REG:
3459                         call = (MonoCallInst*)ins;
3460
3461                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3462                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3463                                 ins->sreg1 = AMD64_R11;
3464                         }
3465
3466                         code = emit_load_arguments (cfg, call, code);
3467
3468                         /*
3469                          * The AMD64 ABI forces callers to know about varargs.
3470                          */
3471                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
3472                                 if (ins->sreg1 == AMD64_RAX) {
3473                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
3474                                         ins->sreg1 = AMD64_R11;
3475                                 }
3476                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3477                         }
3478                         amd64_call_reg (code, ins->sreg1);
3479                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3480                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3481                         code = emit_move_return_value (cfg, ins, code);
3482                         break;
3483                 case OP_FCALL_MEMBASE:
3484                 case OP_LCALL_MEMBASE:
3485                 case OP_VCALL_MEMBASE:
3486                 case OP_VOIDCALL_MEMBASE:
3487                 case OP_CALL_MEMBASE:
3488                         call = (MonoCallInst*)ins;
3489
3490                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3491                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3492                                 ins->sreg1 = AMD64_R11;
3493                         }
3494
3495                         code = emit_load_arguments (cfg, call, code);
3496
3497                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
3498                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3499                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3500                         code = emit_move_return_value (cfg, ins, code);
3501                         break;
3502                 case OP_OUTARG:
3503                 case OP_X86_PUSH:
3504                         amd64_push_reg (code, ins->sreg1);
3505                         break;
3506                 case OP_X86_PUSH_IMM:
3507                         g_assert (amd64_is_imm32 (ins->inst_imm));
3508                         amd64_push_imm (code, ins->inst_imm);
3509                         break;
3510                 case OP_X86_PUSH_MEMBASE:
3511                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
3512                         break;
3513                 case OP_X86_PUSH_OBJ: 
3514                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
3515                         amd64_push_reg (code, AMD64_RDI);
3516                         amd64_push_reg (code, AMD64_RSI);
3517                         amd64_push_reg (code, AMD64_RCX);
3518                         if (ins->inst_offset)
3519                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
3520                         else
3521                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
3522                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
3523                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
3524                         amd64_cld (code);
3525                         amd64_prefix (code, X86_REP_PREFIX);
3526                         amd64_movsd (code);
3527                         amd64_pop_reg (code, AMD64_RCX);
3528                         amd64_pop_reg (code, AMD64_RSI);
3529                         amd64_pop_reg (code, AMD64_RDI);
3530                         break;
3531                 case OP_X86_LEA:
3532                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3533                         break;
3534                 case OP_X86_LEA_MEMBASE:
3535                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3536                         break;
3537                 case OP_X86_XCHG:
3538                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3539                         break;
3540                 case OP_LOCALLOC:
3541                         /* keep alignment */
3542                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3543                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3544                         code = mono_emit_stack_alloc (code, ins);
3545                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3546                         break;
3547                 case CEE_RET:
3548                         amd64_ret (code);
3549                         break;
3550                 case CEE_THROW: {
3551                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
3552                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3553                                              (gpointer)"mono_arch_throw_exception");
3554                         break;
3555                 }
3556                 case OP_CALL_HANDLER: 
3557                         /* Align stack */
3558                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
3559                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3560                         amd64_call_imm (code, 0);
3561                         /* Restore stack alignment */
3562                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3563                         break;
3564                 case OP_LABEL:
3565                         ins->inst_c0 = code - cfg->native_code;
3566                         break;
3567                 case CEE_BR:
3568                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3569                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3570                         //break;
3571                         if (ins->flags & MONO_INST_BRLABEL) {
3572                                 if (ins->inst_i0->inst_c0) {
3573                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3574                                 } else {
3575                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3576                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3577                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3578                                                 x86_jump8 (code, 0);
3579                                         else 
3580                                                 x86_jump32 (code, 0);
3581                                 }
3582                         } else {
3583                                 if (ins->inst_target_bb->native_offset) {
3584                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3585                                 } else {
3586                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3587                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3588                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3589                                                 x86_jump8 (code, 0);
3590                                         else 
3591                                                 x86_jump32 (code, 0);
3592                                 } 
3593                         }
3594                         break;
3595                 case OP_BR_REG:
3596                         amd64_jump_reg (code, ins->sreg1);
3597                         break;
3598                 case OP_CEQ:
3599                 case OP_ICEQ:
3600                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3601                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3602                         break;
3603                 case OP_CLT:
3604                 case OP_ICLT:
3605                         amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3606                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3607                         break;
3608                 case OP_CLT_UN:
3609                 case OP_ICLT_UN:
3610                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3611                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3612                         break;
3613                 case OP_CGT:
3614                 case OP_ICGT:
3615                         amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3616                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3617                         break;
3618                 case OP_CGT_UN:
3619                 case OP_ICGT_UN:
3620                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3621                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3622                         break;
3623                 case OP_COND_EXC_EQ:
3624                 case OP_COND_EXC_NE_UN:
3625                 case OP_COND_EXC_LT:
3626                 case OP_COND_EXC_LT_UN:
3627                 case OP_COND_EXC_GT:
3628                 case OP_COND_EXC_GT_UN:
3629                 case OP_COND_EXC_GE:
3630                 case OP_COND_EXC_GE_UN:
3631                 case OP_COND_EXC_LE:
3632                 case OP_COND_EXC_LE_UN:
3633                 case OP_COND_EXC_OV:
3634                 case OP_COND_EXC_NO:
3635                 case OP_COND_EXC_C:
3636                 case OP_COND_EXC_NC:
3637                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3638                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3639                         break;
3640                 case CEE_BEQ:
3641                 case CEE_BNE_UN:
3642                 case CEE_BLT:
3643                 case CEE_BLT_UN:
3644                 case CEE_BGT:
3645                 case CEE_BGT_UN:
3646                 case CEE_BGE:
3647                 case CEE_BGE_UN:
3648                 case CEE_BLE:
3649                 case CEE_BLE_UN:
3650                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3651                         break;
3652
3653                 /* floating point opcodes */
3654                 case OP_R8CONST: {
3655                         double d = *(double *)ins->inst_p0;
3656
3657                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3658                                 amd64_fldz (code);
3659                         } else if (d == 1.0) {
3660                                 x86_fld1 (code);
3661                         } else {
3662                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
3663                                 amd64_fld_membase (code, AMD64_RIP, 0, TRUE);
3664                         }
3665                         break;
3666                 }
3667                 case OP_R4CONST: {
3668                         float f = *(float *)ins->inst_p0;
3669
3670                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3671                                 amd64_fldz (code);
3672                         } else if (f == 1.0) {
3673                                 x86_fld1 (code);
3674                         } else {
3675                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
3676                                 amd64_fld_membase (code, AMD64_RIP, 0, FALSE);
3677                         }
3678                         break;
3679                 }
3680                 case OP_STORER8_MEMBASE_REG:
3681                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3682                         break;
3683                 case OP_LOADR8_SPILL_MEMBASE:
3684                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3685                         amd64_fxch (code, 1);
3686                         break;
3687                 case OP_LOADR8_MEMBASE:
3688                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3689                         break;
3690                 case OP_STORER4_MEMBASE_REG:
3691                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3692                         break;
3693                 case OP_LOADR4_MEMBASE:
3694                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3695                         break;
3696                 case CEE_CONV_R4: /* FIXME: change precision */
3697                 case CEE_CONV_R8:
3698                         amd64_push_reg (code, ins->sreg1);
3699                         amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
3700                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3701                         break;
3702                 case CEE_CONV_R_UN:
3703                         /* Emulated */
3704                         g_assert_not_reached ();
3705                         break;
3706                 case OP_LCONV_TO_R4: /* FIXME: change precision */
3707                 case OP_LCONV_TO_R8:
3708                         amd64_push_reg (code, ins->sreg1);
3709                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3710                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3711                         break;
3712                 case OP_X86_FP_LOAD_I8:
3713                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3714                         break;
3715                 case OP_X86_FP_LOAD_I4:
3716                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3717                         break;
3718                 case OP_FCONV_TO_I1:
3719                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3720                         break;
3721                 case OP_FCONV_TO_U1:
3722                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3723                         break;
3724                 case OP_FCONV_TO_I2:
3725                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3726                         break;
3727                 case OP_FCONV_TO_U2:
3728                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3729                         break;
3730                 case OP_FCONV_TO_I4:
3731                 case OP_FCONV_TO_I:
3732                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3733                         break;
3734                 case OP_FCONV_TO_I8:
3735                         code = emit_float_to_int (cfg, code, ins->dreg, 8, TRUE);
3736                         break;
3737                 case OP_LCONV_TO_R_UN: { 
3738                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3739                         guint8 *br;
3740
3741                         /* load 64bit integer to FP stack */
3742                         amd64_push_imm (code, 0);
3743                         amd64_push_reg (code, ins->sreg2);
3744                         amd64_push_reg (code, ins->sreg1);
3745                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3746                         /* store as 80bit FP value */
3747                         x86_fst80_membase (code, AMD64_RSP, 0);
3748                         
3749                         /* test if lreg is negative */
3750                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3751                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3752         
3753                         /* add correction constant mn */
3754                         x86_fld80_mem (code, mn);
3755                         x86_fld80_membase (code, AMD64_RSP, 0);
3756                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3757                         x86_fst80_membase (code, AMD64_RSP, 0);
3758
3759                         amd64_patch (br, code);
3760
3761                         x86_fld80_membase (code, AMD64_RSP, 0);
3762                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3763
3764                         break;
3765                 }
3766                 case OP_LCONV_TO_OVF_I: {
3767                         guint8 *br [3], *label [1];
3768
3769                         /* 
3770                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3771                          */
3772                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3773
3774                         /* If the low word top bit is set, see if we are negative */
3775                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3776                         /* We are not negative (no top bit set, check for our top word to be zero */
3777                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3778                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3779                         label [0] = code;
3780
3781                         /* throw exception */
3782                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3783                         x86_jump32 (code, 0);
3784         
3785                         amd64_patch (br [0], code);
3786                         /* our top bit is set, check that top word is 0xfffffff */
3787                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3788                 
3789                         amd64_patch (br [1], code);
3790                         /* nope, emit exception */
3791                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3792                         amd64_patch (br [2], label [0]);
3793
3794                         if (ins->dreg != ins->sreg1)
3795                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3796                         break;
3797                 }
3798                 case CEE_CONV_OVF_U4:
3799                         /* FIXME: */
3800                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3801                         break;
3802                 case OP_FADD:
3803                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3804                         break;
3805                 case OP_FSUB:
3806                         amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3807                         break;          
3808                 case OP_FMUL:
3809                         amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3810                         break;          
3811                 case OP_FDIV:
3812                         amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3813                         break;          
3814                 case OP_FNEG:
3815                         amd64_fchs (code);
3816                         break;          
3817                 case OP_SIN:
3818                         amd64_fsin (code);
3819                         amd64_fldz (code);
3820                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3821                         break;          
3822                 case OP_COS:
3823                         amd64_fcos (code);
3824                         amd64_fldz (code);
3825                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3826                         break;          
3827                 case OP_ABS:
3828                         amd64_fabs (code);
3829                         break;          
3830                 case OP_TAN: {
3831                         /* 
3832                          * it really doesn't make sense to inline all this code,
3833                          * it's here just to show that things may not be as simple 
3834                          * as they appear.
3835                          */
3836                         guchar *check_pos, *end_tan, *pop_jump;
3837                         amd64_push_reg (code, AMD64_RAX);
3838                         amd64_fptan (code);
3839                         amd64_fnstsw (code);
3840                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3841                         check_pos = code;
3842                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3843                         amd64_fstp (code, 0); /* pop the 1.0 */
3844                         end_tan = code;
3845                         x86_jump8 (code, 0);
3846                         amd64_fldpi (code);
3847                         amd64_fp_op (code, X86_FADD, 0);
3848                         amd64_fxch (code, 1);
3849                         x86_fprem1 (code);
3850                         amd64_fstsw (code);
3851                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3852                         pop_jump = code;
3853                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3854                         amd64_fstp (code, 1);
3855                         amd64_fptan (code);
3856                         amd64_patch (pop_jump, code);
3857                         amd64_fstp (code, 0); /* pop the 1.0 */
3858                         amd64_patch (check_pos, code);
3859                         amd64_patch (end_tan, code);
3860                         amd64_fldz (code);
3861                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3862                         amd64_pop_reg (code, AMD64_RAX);
3863                         break;
3864                 }
3865                 case OP_ATAN:
3866                         x86_fld1 (code);
3867                         amd64_fpatan (code);
3868                         amd64_fldz (code);
3869                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3870                         break;          
3871                 case OP_SQRT:
3872                         amd64_fsqrt (code);
3873                         break;          
3874                 case OP_X86_FPOP:
3875                         amd64_fstp (code, 0);
3876                         break;          
3877                 case OP_FREM: {
3878                         guint8 *l1, *l2;
3879
3880                         amd64_push_reg (code, AMD64_RAX);
3881                         /* we need to exchange ST(0) with ST(1) */
3882                         amd64_fxch (code, 1);
3883
3884                         /* this requires a loop, because fprem somtimes 
3885                          * returns a partial remainder */
3886                         l1 = code;
3887                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3888                         /* x86_fprem1 (code); */
3889                         amd64_fprem (code);
3890                         amd64_fnstsw (code);
3891                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3892                         l2 = code + 2;
3893                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3894
3895                         /* pop result */
3896                         amd64_fstp (code, 1);
3897
3898                         amd64_pop_reg (code, AMD64_RAX);
3899                         break;
3900                 }
3901                 case OP_FCOMPARE:
3902                         if (cfg->opt & MONO_OPT_FCMOV) {
3903                                 amd64_fcomip (code, 1);
3904                                 amd64_fstp (code, 0);
3905                                 break;
3906                         }
3907                         /* this overwrites EAX */
3908                         EMIT_FPCOMPARE(code);
3909                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3910                         break;
3911                 case OP_FCEQ:
3912                         if (cfg->opt & MONO_OPT_FCMOV) {
3913                                 /* zeroing the register at the start results in 
3914                                  * shorter and faster code (we can also remove the widening op)
3915                                  */
3916                                 guchar *unordered_check;
3917                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3918                                 amd64_fcomip (code, 1);
3919                                 amd64_fstp (code, 0);
3920                                 unordered_check = code;
3921                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3922                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3923                                 amd64_patch (unordered_check, code);
3924                                 break;
3925                         }
3926                         if (ins->dreg != AMD64_RAX) 
3927                                 amd64_push_reg (code, AMD64_RAX);
3928
3929                         EMIT_FPCOMPARE(code);
3930                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3931                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3932                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3933                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3934
3935                         if (ins->dreg != AMD64_RAX) 
3936                                 amd64_pop_reg (code, AMD64_RAX);
3937                         break;
3938                 case OP_FCLT:
3939                 case OP_FCLT_UN:
3940                         if (cfg->opt & MONO_OPT_FCMOV) {
3941                                 /* zeroing the register at the start results in 
3942                                  * shorter and faster code (we can also remove the widening op)
3943                                  */
3944                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3945                                 amd64_fcomip (code, 1);
3946                                 amd64_fstp (code, 0);
3947                                 if (ins->opcode == OP_FCLT_UN) {
3948                                         guchar *unordered_check = code;
3949                                         guchar *jump_to_end;
3950                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3951                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3952                                         jump_to_end = code;
3953                                         x86_jump8 (code, 0);
3954                                         amd64_patch (unordered_check, code);
3955                                         amd64_inc_reg (code, ins->dreg);
3956                                         amd64_patch (jump_to_end, code);
3957                                 } else {
3958                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3959                                 }
3960                                 break;
3961                         }
3962                         if (ins->dreg != AMD64_RAX) 
3963                                 amd64_push_reg (code, AMD64_RAX);
3964
3965                         EMIT_FPCOMPARE(code);
3966                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3967                         if (ins->opcode == OP_FCLT_UN) {
3968                                 guchar *is_not_zero_check, *end_jump;
3969                                 is_not_zero_check = code;
3970                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3971                                 end_jump = code;
3972                                 x86_jump8 (code, 0);
3973                                 amd64_patch (is_not_zero_check, code);
3974                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3975
3976                                 amd64_patch (end_jump, code);
3977                         }
3978                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3979                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3980
3981                         if (ins->dreg != AMD64_RAX) 
3982                                 amd64_pop_reg (code, AMD64_RAX);
3983                         break;
3984                 case OP_FCGT:
3985                 case OP_FCGT_UN:
3986                         if (cfg->opt & MONO_OPT_FCMOV) {
3987                                 /* zeroing the register at the start results in 
3988                                  * shorter and faster code (we can also remove the widening op)
3989                                  */
3990                                 guchar *unordered_check;
3991                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3992                                 amd64_fcomip (code, 1);
3993                                 amd64_fstp (code, 0);
3994                                 if (ins->opcode == OP_FCGT) {
3995                                         unordered_check = code;
3996                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3997                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3998                                         amd64_patch (unordered_check, code);
3999                                 } else {
4000                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4001                                 }
4002                                 break;
4003                         }
4004                         if (ins->dreg != AMD64_RAX) 
4005                                 amd64_push_reg (code, AMD64_RAX);
4006
4007                         EMIT_FPCOMPARE(code);
4008                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
4009                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4010                         if (ins->opcode == OP_FCGT_UN) {
4011                                 guchar *is_not_zero_check, *end_jump;
4012                                 is_not_zero_check = code;
4013                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4014                                 end_jump = code;
4015                                 x86_jump8 (code, 0);
4016                                 amd64_patch (is_not_zero_check, code);
4017                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4018
4019                                 amd64_patch (end_jump, code);
4020                         }
4021                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
4022                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4023
4024                         if (ins->dreg != AMD64_RAX) 
4025                                 amd64_pop_reg (code, AMD64_RAX);
4026                         break;
4027                 case OP_FBEQ:
4028                         if (cfg->opt & MONO_OPT_FCMOV) {
4029                                 guchar *jump = code;
4030                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
4031                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4032                                 amd64_patch (jump, code);
4033                                 break;
4034                         }
4035                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
4036                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
4037                         break;
4038                 case OP_FBNE_UN:
4039                         /* Branch if C013 != 100 */
4040                         if (cfg->opt & MONO_OPT_FCMOV) {
4041                                 /* branch if !ZF or (PF|CF) */
4042                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4043                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4044                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
4045                                 break;
4046                         }
4047                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4048                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4049                         break;
4050                 case OP_FBLT:
4051                         if (cfg->opt & MONO_OPT_FCMOV) {
4052                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4053                                 break;
4054                         }
4055                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4056                         break;
4057                 case OP_FBLT_UN:
4058                         if (cfg->opt & MONO_OPT_FCMOV) {
4059                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4060                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4061                                 break;
4062                         }
4063                         if (ins->opcode == OP_FBLT_UN) {
4064                                 guchar *is_not_zero_check, *end_jump;
4065                                 is_not_zero_check = code;
4066                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4067                                 end_jump = code;
4068                                 x86_jump8 (code, 0);
4069                                 amd64_patch (is_not_zero_check, code);
4070                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4071
4072                                 amd64_patch (end_jump, code);
4073                         }
4074                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4075                         break;
4076                 case OP_FBGT:
4077                 case OP_FBGT_UN:
4078                         if (cfg->opt & MONO_OPT_FCMOV) {
4079                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
4080                                 break;
4081                         }
4082                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4083                         if (ins->opcode == OP_FBGT_UN) {
4084                                 guchar *is_not_zero_check, *end_jump;
4085                                 is_not_zero_check = code;
4086                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4087                                 end_jump = code;
4088                                 x86_jump8 (code, 0);
4089                                 amd64_patch (is_not_zero_check, code);
4090                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4091
4092                                 amd64_patch (end_jump, code);
4093                         }
4094                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4095                         break;
4096                 case OP_FBGE:
4097                         /* Branch if C013 == 100 or 001 */
4098                         if (cfg->opt & MONO_OPT_FCMOV) {
4099                                 guchar *br1;
4100
4101                                 /* skip branch if C1=1 */
4102                                 br1 = code;
4103                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4104                                 /* branch if (C0 | C3) = 1 */
4105                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
4106                                 amd64_patch (br1, code);
4107                                 break;
4108                         }
4109                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4110                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4111                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4112                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4113                         break;
4114                 case OP_FBGE_UN:
4115                         /* Branch if C013 == 000 */
4116                         if (cfg->opt & MONO_OPT_FCMOV) {
4117                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
4118                                 break;
4119                         }
4120                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4121                         break;
4122                 case OP_FBLE:
4123                         /* Branch if C013=000 or 100 */
4124                         if (cfg->opt & MONO_OPT_FCMOV) {
4125                                 guchar *br1;
4126
4127                                 /* skip branch if C1=1 */
4128                                 br1 = code;
4129                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4130                                 /* branch if C0=0 */
4131                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
4132                                 amd64_patch (br1, code);
4133                                 break;
4134                         }
4135                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
4136                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4137                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4138                         break;
4139                 case OP_FBLE_UN:
4140                         /* Branch if C013 != 001 */
4141                         if (cfg->opt & MONO_OPT_FCMOV) {
4142                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4143                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4144                                 break;
4145                         }
4146                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4147                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4148                         break;
4149                 case CEE_CKFINITE: {
4150                         amd64_push_reg (code, AMD64_RAX);
4151                         amd64_fxam (code);
4152                         amd64_fnstsw (code);
4153                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
4154                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4155                         amd64_pop_reg (code, AMD64_RAX);
4156                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4157                         break;
4158                 }
4159                 case OP_X86_TLS_GET: {
4160                         x86_prefix (code, X86_FS_PREFIX);
4161                         amd64_mov_reg_mem (code, ins->dreg, ins->inst_offset, 8);
4162                         break;
4163                 }
4164                 default:
4165                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4166                         g_assert_not_reached ();
4167                 }
4168
4169                 if ((code - cfg->native_code - offset) > max_len) {
4170                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
4171                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4172                         g_assert_not_reached ();
4173                 }
4174                
4175                 cpos += max_len;
4176
4177                 last_ins = ins;
4178                 last_offset = offset;
4179                 
4180                 ins = ins->next;
4181         }
4182
4183         cfg->code_len = code - cfg->native_code;
4184 }
4185
4186 void
4187 mono_arch_register_lowlevel_calls (void)
4188 {
4189 }
4190
4191 void
4192 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4193 {
4194         MonoJumpInfo *patch_info;
4195
4196         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4197                 unsigned char *ip = patch_info->ip.i + code;
4198                 const unsigned char *target;
4199
4200                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4201
4202                 switch (patch_info->type) {
4203                 case MONO_PATCH_INFO_METHOD_REL:
4204                 case MONO_PATCH_INFO_METHOD_JUMP:
4205                         *((gconstpointer *)(ip + 2)) = target;
4206                         continue;
4207                 case MONO_PATCH_INFO_SWITCH: {
4208                         *((gconstpointer *)(ip + 2)) = target;
4209                         continue;
4210                 }
4211                 case MONO_PATCH_INFO_IID:
4212                         *((guint32 *)(ip + 2)) = (guint32)(guint64)target;
4213                         continue;                       
4214                 case MONO_PATCH_INFO_CLASS_INIT: {
4215                         /* FIXME: Might already been changed to a nop */
4216                         *((gconstpointer *)(ip + 2)) = target;
4217                         continue;
4218                 }
4219                 case MONO_PATCH_INFO_R8:
4220                 case MONO_PATCH_INFO_R4:
4221                         g_assert_not_reached ();
4222                         continue;
4223                 case MONO_PATCH_INFO_METHODCONST:
4224                 case MONO_PATCH_INFO_CLASS:
4225                 case MONO_PATCH_INFO_IMAGE:
4226                 case MONO_PATCH_INFO_FIELD:
4227                 case MONO_PATCH_INFO_VTABLE:
4228                 case MONO_PATCH_INFO_SFLDA:
4229                 case MONO_PATCH_INFO_EXC_NAME:
4230                 case MONO_PATCH_INFO_LDSTR:
4231                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
4232                 case MONO_PATCH_INFO_LDTOKEN:
4233                 case MONO_PATCH_INFO_IP:
4234                         *((gconstpointer *)(ip + 2)) = target;
4235                         continue;
4236                 case MONO_PATCH_INFO_METHOD:
4237                         *((gconstpointer *)(ip + 2)) = target;
4238                         continue;
4239                 case MONO_PATCH_INFO_ABS:
4240                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4241                         break;
4242                 default:
4243                         break;
4244                 }
4245                 amd64_patch (ip, (gpointer)target);
4246         }
4247 }
4248
4249 guint8 *
4250 mono_arch_emit_prolog (MonoCompile *cfg)
4251 {
4252         MonoMethod *method = cfg->method;
4253         MonoBasicBlock *bb;
4254         MonoMethodSignature *sig;
4255         MonoInst *inst;
4256         int alloc_size, pos, max_offset, i;
4257         guint8 *code;
4258         CallInfo *cinfo;
4259
4260         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
4261         code = cfg->native_code = g_malloc (cfg->code_size);
4262
4263         amd64_push_reg (code, AMD64_RBP);
4264         amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
4265
4266         /* Stack alignment check */
4267 #if 0
4268         {
4269                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
4270                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
4271                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4272                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
4273                 amd64_breakpoint (code);
4274         }
4275 #endif
4276
4277         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
4278         pos = 0;
4279
4280         if (method->save_lmf) {
4281
4282                 pos = ALIGN_TO (pos + sizeof (MonoLMF), 16);
4283
4284                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, pos);
4285
4286                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4287
4288                 /* Save ip */
4289                 amd64_lea_membase (code, AMD64_R11, AMD64_RIP, 0);
4290                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8);
4291                 /* Save fp */
4292                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), AMD64_RBP, 8);
4293                 /* Save method */
4294                 /* FIXME: add a relocation for this */
4295                 if (IS_IMM32 (cfg->method))
4296                         amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), (guint64)cfg->method, 8);
4297                 else {
4298                         amd64_mov_reg_imm (code, AMD64_R11, cfg->method);
4299                         amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8);
4300                 }
4301                 /* Save callee saved regs */
4302                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
4303                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
4304                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
4305                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
4306                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
4307         } else {
4308
4309                 for (i = 0; i < AMD64_NREG; ++i)
4310                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4311                                 amd64_push_reg (code, i);
4312                                 pos += sizeof (gpointer);
4313                         }
4314         }
4315
4316         alloc_size -= pos;
4317
4318         if (alloc_size) {
4319                 /* See mono_emit_stack_alloc */
4320 #ifdef PLATFORM_WIN32
4321                 guint32 remaining_size = alloc_size;
4322                 while (remaining_size >= 0x1000) {
4323                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
4324                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
4325                         remaining_size -= 0x1000;
4326                 }
4327                 if (remaining_size)
4328                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
4329 #else
4330                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
4331 #endif
4332         }
4333
4334         /* compute max_offset in order to use short forward jumps */
4335         max_offset = 0;
4336         if (cfg->opt & MONO_OPT_BRANCH) {
4337                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4338                         MonoInst *ins = bb->code;
4339                         bb->max_offset = max_offset;
4340
4341                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4342                                 max_offset += 6;
4343                         /* max alignment for loops */
4344                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4345                                 max_offset += LOOP_ALIGNMENT;
4346
4347                         while (ins) {
4348                                 if (ins->opcode == OP_LABEL)
4349                                         ins->inst_c1 = max_offset;
4350                                 
4351                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4352                                 ins = ins->next;
4353                         }
4354                 }
4355         }
4356
4357         sig = method->signature;
4358         pos = 0;
4359
4360         cinfo = get_call_info (sig, FALSE);
4361
4362         if (sig->ret->type != MONO_TYPE_VOID) {
4363                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
4364                         /* Save volatile arguments to the stack */
4365                         amd64_mov_membase_reg (code, cfg->ret->inst_basereg, cfg->ret->inst_offset, cinfo->ret.reg, 8);
4366                 }
4367         }
4368
4369         /* Keep this in sync with emit_load_volatile_arguments */
4370         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4371                 ArgInfo *ainfo = cinfo->args + i;
4372                 gint32 stack_offset;
4373                 MonoType *arg_type;
4374                 inst = cfg->varinfo [i];
4375
4376                 if (sig->hasthis && (i == 0))
4377                         arg_type = &mono_defaults.object_class->byval_arg;
4378                 else
4379                         arg_type = sig->params [i - sig->hasthis];
4380
4381                 stack_offset = ainfo->offset + ARGS_OFFSET;
4382
4383                 /* Save volatile arguments to the stack */
4384                 if (inst->opcode != OP_REGVAR) {
4385                         switch (ainfo->storage) {
4386                         case ArgInIReg: {
4387                                 guint32 size = 8;
4388
4389                                 /* FIXME: I1 etc */
4390                                 /*
4391                                 if (stack_offset & 0x1)
4392                                         size = 1;
4393                                 else if (stack_offset & 0x2)
4394                                         size = 2;
4395                                 else if (stack_offset & 0x4)
4396                                         size = 4;
4397                                 else
4398                                         size = 8;
4399                                 */
4400                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, size);
4401                                 break;
4402                         }
4403                         case ArgInFloatSSEReg:
4404                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4405                                 break;
4406                         case ArgInDoubleSSEReg:
4407                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4408                                 break;
4409                         default:
4410                                 break;
4411                         }
4412                 }
4413
4414                 if (inst->opcode == OP_REGVAR) {
4415                         /* Argument allocated to (non-volatile) register */
4416                         switch (ainfo->storage) {
4417                         case ArgInIReg:
4418                                 amd64_mov_reg_reg (code, inst->dreg, ainfo->reg, 8);
4419                                 break;
4420                         case ArgOnStack:
4421                                 amd64_mov_reg_membase (code, inst->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
4422                                 break;
4423                         default:
4424                                 g_assert_not_reached ();
4425                         }
4426                 }
4427         }
4428
4429         if (method->save_lmf) {
4430                 if (lmf_tls_offset != -1) {
4431                         /* Load lmf quicky using the FS register */
4432                         x86_prefix (code, X86_FS_PREFIX);
4433                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
4434                 }
4435                 else {
4436                         /* 
4437                          * The call might clobber argument registers, but they are already
4438                          * saved to the stack/global regs.
4439                          */
4440
4441                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4442                                                                  (gpointer)"mono_get_lmf_addr");                
4443                 }
4444
4445                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4446
4447                 /* Save lmf_addr */
4448                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
4449                 /* Save previous_lmf */
4450                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
4451                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
4452                 /* Set new lmf */
4453                 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
4454                 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
4455         }
4456
4457
4458         g_free (cinfo);
4459
4460         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4461                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4462
4463         cfg->code_len = code - cfg->native_code;
4464
4465         g_assert (cfg->code_len < cfg->code_size);
4466
4467         return code;
4468 }
4469
4470 void
4471 mono_arch_emit_epilog (MonoCompile *cfg)
4472 {
4473         MonoJumpInfo *patch_info;
4474         MonoMethod *method = cfg->method;
4475         int pos, i;
4476         guint8 *code;
4477
4478         code = cfg->native_code + cfg->code_len;
4479
4480         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4481                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4482
4483         /* the code restoring the registers must be kept in sync with CEE_JMP */
4484         pos = 0;
4485         
4486         if (method->save_lmf) {
4487                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4488
4489                 /* Restore previous lmf */
4490                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
4491                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
4492                 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
4493
4494                 /* Restore caller saved regs */
4495                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4496                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4497                 }
4498                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4499                         amd64_mov_reg_membase (code, AMD64_R12, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4500                 }
4501                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4502                         amd64_mov_reg_membase (code, AMD64_R13, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4503                 }
4504                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4505                         amd64_mov_reg_membase (code, AMD64_R14, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4506                 }
4507                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4508                         amd64_mov_reg_membase (code, AMD64_R15, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4509                 }
4510         } else {
4511
4512                 for (i = 0; i < AMD64_NREG; ++i)
4513                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4514                                 pos -= sizeof (gpointer);
4515
4516                 if (pos) {
4517                         if (pos == - sizeof (gpointer)) {
4518                                 /* Only one register, so avoid lea */
4519                                 for (i = AMD64_NREG - 1; i > 0; --i)
4520                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4521                                                 amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4522                                         }
4523                         }
4524                         else {
4525                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4526
4527                                 /* Pop registers in reverse order */
4528                                 for (i = AMD64_NREG - 1; i > 0; --i)
4529                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4530                                                 amd64_pop_reg (code, i);
4531                                         }
4532                         }
4533                 }
4534         }
4535
4536         amd64_leave (code);
4537         amd64_ret (code);
4538
4539         /* add code to raise exceptions */
4540         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4541                 switch (patch_info->type) {
4542                 case MONO_PATCH_INFO_EXC: {
4543                         guint64 offset;
4544
4545                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4546                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
4547                         amd64_set_reg_template (code, AMD64_RDI);
4548                         /* 7 is the length of the lea */
4549                         offset = (((guint64)code + 7) - (guint64)cfg->native_code) - (guint64)patch_info->ip.i;
4550                         amd64_lea_membase (code, AMD64_RSI, AMD64_RIP, - offset);
4551                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4552                         patch_info->data.name = "mono_arch_throw_exception_by_name";
4553                         patch_info->ip.i = code - cfg->native_code;
4554                         EMIT_CALL ();
4555                         break;
4556                 }
4557                 default:
4558                         /* do nothing */
4559                         break;
4560                 }
4561         }
4562
4563         /* Handle relocations with RIP relative addressing */
4564         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4565                 gboolean remove = FALSE;
4566
4567                 switch (patch_info->type) {
4568                 case MONO_PATCH_INFO_R8: {
4569                         code = (guint8*)ALIGN_TO (code, 8);
4570
4571                         guint8* pos = cfg->native_code + patch_info->ip.i;
4572
4573                         *(double*)code = *(double*)patch_info->data.target;
4574
4575                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4576                         code += 8;
4577
4578                         remove = TRUE;
4579                         break;
4580                 }
4581                 case MONO_PATCH_INFO_R4: {
4582                         code = (guint8*)ALIGN_TO (code, 8);
4583
4584                         guint8* pos = cfg->native_code + patch_info->ip.i;
4585
4586                         *(float*)code = *(float*)patch_info->data.target;
4587
4588                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4589                         code += 4;
4590
4591                         remove = TRUE;
4592                         break;
4593                 }
4594                 default:
4595                         break;
4596                 }
4597
4598                 if (remove) {
4599                         if (patch_info == cfg->patch_info)
4600                                 cfg->patch_info = patch_info->next;
4601                         else {
4602                                 MonoJumpInfo *tmp;
4603
4604                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4605                                         ;
4606                                 tmp->next = patch_info->next;
4607                         }
4608                 }
4609         }
4610
4611         cfg->code_len = code - cfg->native_code;
4612
4613         g_assert (cfg->code_len < cfg->code_size);
4614
4615 }
4616
4617 /*
4618  * Allow tracing to work with this interface (with an optional argument)
4619  */
4620
4621 /*
4622  * This may be needed on some archs or for debugging support.
4623  */
4624 void
4625 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
4626 {
4627         /* no stack room needed now (may be needed for FASTCALL-trace support) */
4628         *stack = 0;
4629         /* split prolog-epilog requirements? */
4630         *code = 50; /* max bytes needed: check this number */
4631 }
4632
4633 void*
4634 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4635 {
4636         guchar *code = p;
4637         CallInfo *cinfo;
4638         MonoMethodSignature *sig;
4639         MonoInst *inst;
4640         int i, n, stack_area = 0;
4641
4642         /* Keep this in sync with mono_arch_get_argument_info */
4643
4644         if (enable_arguments) {
4645                 /* Allocate a new area on the stack and save arguments there */
4646                 sig = cfg->method->signature;
4647
4648                 cinfo = get_call_info (sig, FALSE);
4649
4650                 n = sig->param_count + sig->hasthis;
4651
4652                 stack_area = ALIGN_TO (n * 8, 16);
4653
4654                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4655
4656                 for (i = 0; i < n; ++i) {
4657                         ArgInfo *ainfo = cinfo->args + i;
4658                         gint32 stack_offset;
4659                         MonoType *arg_type;
4660                         inst = cfg->varinfo [i];
4661
4662                         if (sig->hasthis && (i == 0))
4663                                 arg_type = &mono_defaults.object_class->byval_arg;
4664                         else
4665                                 arg_type = sig->params [i - sig->hasthis];
4666
4667                         stack_offset = ainfo->offset + ARGS_OFFSET;
4668
4669                         switch (ainfo->storage) {
4670                         case ArgInIReg:
4671                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg, 8);                                
4672                                 break;
4673                         case ArgInFloatSSEReg:
4674                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4675                                 break;
4676                         case ArgInDoubleSSEReg:
4677                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4678                                 break;
4679                         case ArgOnStack:
4680                                 /* Copy from original stack location to the argument area */
4681                                 /* FIXME: valuetypes etc */
4682                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4683                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4684                                 break;
4685                         default:
4686                                 g_assert_not_reached ();
4687                         }
4688                 }
4689         }
4690
4691         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4692         amd64_set_reg_template (code, AMD64_RDI);
4693         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RSP, 8);
4694         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4695
4696         if (enable_arguments) {
4697                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4698
4699                 g_free (cinfo);
4700         }
4701
4702         return code;
4703 }
4704
4705 enum {
4706         SAVE_NONE,
4707         SAVE_STRUCT,
4708         SAVE_EAX,
4709         SAVE_EAX_EDX,
4710         SAVE_XMM
4711 };
4712
4713 void*
4714 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4715 {
4716         guchar *code = p;
4717         int save_mode = SAVE_NONE;
4718         MonoMethod *method = cfg->method;
4719         int rtype = method->signature->ret->type;
4720
4721 handle_enum:
4722         switch (rtype) {
4723         case MONO_TYPE_VOID:
4724                 /* special case string .ctor icall */
4725                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4726                         save_mode = SAVE_EAX;
4727                 else
4728                         save_mode = SAVE_NONE;
4729                 break;
4730         case MONO_TYPE_I8:
4731         case MONO_TYPE_U8:
4732                 save_mode = SAVE_EAX;
4733                 break;
4734         case MONO_TYPE_R4:
4735         case MONO_TYPE_R8:
4736                 save_mode = SAVE_XMM;
4737                 break;
4738         case MONO_TYPE_VALUETYPE:
4739                 if (method->signature->ret->data.klass->enumtype) {
4740                         rtype = method->signature->ret->data.klass->enum_basetype->type;
4741                         goto handle_enum;
4742                 }
4743                 save_mode = SAVE_STRUCT;
4744                 break;
4745         default:
4746                 save_mode = SAVE_EAX;
4747                 break;
4748         }
4749
4750         /* Save the result and copy it into the proper argument register */
4751         switch (save_mode) {
4752         case SAVE_EAX:
4753                 amd64_push_reg (code, AMD64_RAX);
4754                 /* Align stack */
4755                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4756                 if (enable_arguments)
4757                         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RAX, 8);
4758                 break;
4759         case SAVE_STRUCT:
4760                 /* FIXME: */
4761                 if (enable_arguments)
4762                         amd64_mov_reg_imm (code, AMD64_RSI, 0);
4763                 break;
4764         case SAVE_XMM:
4765                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4766                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4767                 /* Align stack */
4768                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4769                 /* 
4770                  * The result is already in the proper argument register so no copying
4771                  * needed.
4772                  */
4773                 break;
4774         case SAVE_NONE:
4775                 break;
4776         default:
4777                 g_assert_not_reached ();
4778         }
4779
4780         /* Set %al since this is a varargs call */
4781         if (save_mode == SAVE_XMM)
4782                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4783         else
4784                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4785
4786         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4787         amd64_set_reg_template (code, AMD64_RDI);
4788         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4789
4790         /* Restore result */
4791         switch (save_mode) {
4792         case SAVE_EAX:
4793                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4794                 amd64_pop_reg (code, AMD64_RAX);
4795                 break;
4796         case SAVE_STRUCT:
4797                 /* FIXME: */
4798                 break;
4799         case SAVE_XMM:
4800                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4801                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4802                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4803                 break;
4804         case SAVE_NONE:
4805                 break;
4806         default:
4807                 g_assert_not_reached ();
4808         }
4809
4810         return code;
4811 }
4812
4813 int
4814 mono_arch_max_epilog_size (MonoCompile *cfg)
4815 {
4816         int max_epilog_size = 16;
4817         MonoJumpInfo *patch_info;
4818         
4819         if (cfg->method->save_lmf)
4820                 max_epilog_size += 256;
4821         
4822         if (mono_jit_trace_calls != NULL)
4823                 max_epilog_size += 50;
4824
4825         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4826                 max_epilog_size += 50;
4827
4828         max_epilog_size += (AMD64_NREG * 2);
4829
4830         /* 
4831          * make sure we have enough space for exceptions
4832          */
4833         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4834                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4835                         max_epilog_size += 40;
4836                 if (patch_info->type == MONO_PATCH_INFO_R8)
4837                         max_epilog_size += 8 + 7; /* sizeof (double) + alignment */
4838                 if (patch_info->type == MONO_PATCH_INFO_R4)
4839                         max_epilog_size += 4 + 7; /* sizeof (float) + alignment */
4840         }
4841
4842         return max_epilog_size;
4843 }
4844
4845 void
4846 mono_arch_flush_icache (guint8 *code, gint size)
4847 {
4848         /* not needed */
4849 }
4850
4851 void
4852 mono_arch_flush_register_windows (void)
4853 {
4854 }
4855
4856 gboolean 
4857 mono_arch_is_inst_imm (gint64 imm)
4858 {
4859         return amd64_is_imm32 (imm);
4860 }
4861
4862 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
4863
4864 static int reg_to_ucontext_reg [] = {
4865         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
4866         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
4867         REG_RIP
4868 };
4869
4870 /*
4871  * Determine whenever the trap whose info is in SIGINFO is caused by
4872  * integer overflow.
4873  */
4874 gboolean
4875 mono_arch_is_int_overflow (void *sigctx)
4876 {
4877         ucontext_t *ctx = (ucontext_t*)sigctx;
4878         guint8* rip;
4879         int reg;
4880
4881         rip = (guint8*)ctx->uc_mcontext.gregs [REG_RIP];
4882
4883         if (IS_REX (rip [0])) {
4884                 reg = amd64_rex_r (rip [0]);
4885                 rip ++;
4886         }
4887         else
4888                 reg = 0;
4889
4890         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4891                 /* idiv REG */
4892                 reg += x86_modrm_rm (rip [1]);
4893
4894                 if (ctx->uc_mcontext.gregs [reg_to_ucontext_reg [reg]] == -1)
4895                         return TRUE;
4896         }
4897
4898         return FALSE;
4899 }
4900
4901 gpointer*
4902 mono_amd64_get_vcall_slot_addr (guint8* code, guint64 *regs)
4903 {
4904         guint32 reg;
4905         guint32 disp;
4906         guint8 rex = 0;
4907
4908         /* go to the start of the call instruction
4909          *
4910          * address_byte = (m << 6) | (o << 3) | reg
4911          * call opcode: 0xff address_byte displacement
4912          * 0xff m=1,o=2 imm8
4913          * 0xff m=2,o=2 imm32
4914          */
4915         code -= 6;
4916
4917         if (IS_REX (code [3]) && (code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x3)) {
4918                 /* call *%reg */
4919                 return NULL;
4920         }
4921         else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2)) {
4922                 /* call *[reg+disp32] */
4923                 reg = amd64_modrm_rm (code [1]);
4924                 disp = *(guint32*)(code + 2);
4925                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4926         }
4927         else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1)) {
4928                 /* call *[reg+disp8] */
4929                 reg = amd64_modrm_rm (code [4]);
4930                 disp = *(guint8*)(code + 5);
4931                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4932         }
4933         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x0)) {
4934                         /*
4935                          * This is a interface call: should check the above code can't catch it earlier 
4936                          * 8b 40 30   mov    0x30(%eax),%eax
4937                          * ff 10      call   *(%eax)
4938                          */
4939                 reg = amd64_modrm_rm (code [5]);
4940                 disp = 0;
4941         }
4942         else
4943                 g_assert_not_reached ();
4944
4945         reg += amd64_rex_b (rex);
4946
4947         /* FIXME: */
4948         return (gpointer)((regs [reg]) + disp);
4949 }
4950
4951 /*
4952  * Support for fast access to the thread-local lmf structure using the GS
4953  * segment register on NPTL + kernel 2.6.x.
4954  */
4955
4956 static gboolean tls_offset_inited = FALSE;
4957
4958 /* code should be simply return <tls var>; */
4959 static int 
4960 read_tls_offset_from_method (void* method)
4961 {
4962         guint8 *code = (guint8*)method;
4963
4964         /* 
4965          * Determine the offset of mono_lfm_addr inside the TLS structures
4966          * by disassembling the function above.
4967          */
4968         /* This is generated by gcc 3.3.2 */
4969         if ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
4970                 (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
4971                 (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25) &&
4972                 (code [9] == 0x00) && (code [10] == 0x00) && (code [11] == 0x00) &&
4973                 (code [12] == 0x0) && (code [13] == 0x48) && (code [14] == 0x8b) &&
4974                 (code [15] == 0x80)) {
4975                 return *(gint32*)&(code [16]);
4976         } else if 
4977                 /* This is generated by gcc-3.4.1 */
4978                 ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
4979                  (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
4980                  (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25)) {
4981                         return *(gint32*)&(code [9]);
4982         }
4983
4984         return -1;
4985 }
4986
4987 void
4988 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4989 {
4990 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4991         pthread_t self = pthread_self();
4992         pthread_attr_t attr;
4993         void *staddr = NULL;
4994         size_t stsize = 0;
4995         struct sigaltstack sa;
4996 #endif
4997
4998         if (!tls_offset_inited) {
4999                 tls_offset_inited = TRUE;
5000
5001                 lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
5002                 appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
5003                 //thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
5004         }               
5005
5006 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5007
5008         /* Determine stack boundaries */
5009         if (!mono_running_on_valgrind ()) {
5010 #ifdef HAVE_PTHREAD_GETATTR_NP
5011                 pthread_getattr_np( self, &attr );
5012 #else
5013 #ifdef HAVE_PTHREAD_ATTR_GET_NP
5014                 pthread_attr_get_np( self, &attr );
5015 #elif defined(sun)
5016                 pthread_attr_init( &attr );
5017                 pthread_attr_getstacksize( &attr, &stsize );
5018 #else
5019 #error "Not implemented"
5020 #endif
5021 #endif
5022 #ifndef sun
5023                 pthread_attr_getstack( &attr, &staddr, &stsize );
5024 #endif
5025         }
5026
5027         /* 
5028          * staddr seems to be wrong for the main thread, so we keep the value in
5029          * tls->end_of_stack
5030          */
5031         tls->stack_size = stsize;
5032
5033         /* Setup an alternate signal stack */
5034         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
5035         tls->signal_stack_size = SIGNAL_STACK_SIZE;
5036
5037         sa.ss_sp = tls->signal_stack;
5038         sa.ss_size = SIGNAL_STACK_SIZE;
5039         sa.ss_flags = SS_ONSTACK;
5040         sigaltstack (&sa, NULL);
5041 #endif
5042 }
5043
5044 void
5045 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5046 {
5047 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5048         struct sigaltstack sa;
5049
5050         sa.ss_sp = tls->signal_stack;
5051         sa.ss_size = SIGNAL_STACK_SIZE;
5052         sa.ss_flags = SS_DISABLE;
5053         sigaltstack  (&sa, NULL);
5054
5055         if (tls->signal_stack)
5056                 g_free (tls->signal_stack);
5057 #endif
5058 }
5059
5060 void
5061 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
5062 {
5063         int out_reg = param_regs [0];
5064
5065         /* FIXME: RDI and RSI might get clobbered */
5066
5067         if (vt_reg != -1) {
5068                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
5069                 MonoInst *vtarg;
5070
5071                 if (cinfo->ret.storage == ArgValuetypeInReg) {
5072                         /*
5073                          * The valuetype is in RAX:RDX after the call, need to be copied to
5074                          * the stack. Push the address here, so the call instruction can
5075                          * access it.
5076                          */
5077                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
5078                         vtarg->sreg1 = vt_reg;
5079                         mono_bblock_add_inst (cfg->cbb, vtarg);
5080
5081                         /* Align stack */
5082                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
5083                 }
5084                 else {
5085                         MONO_INST_NEW (cfg, vtarg, OP_SETREG);
5086                         vtarg->sreg1 = vt_reg;
5087                         vtarg->dreg = out_reg;
5088                         out_reg = param_regs [1];
5089                         mono_bblock_add_inst (cfg->cbb, vtarg);
5090                 }
5091
5092                 g_free (cinfo);
5093         }
5094
5095         /* add the this argument */
5096         if (this_reg != -1) {
5097                 MonoInst *this;
5098                 MONO_INST_NEW (cfg, this, OP_SETREG);
5099                 this->type = this_type;
5100                 this->sreg1 = this_reg;
5101                 this->dreg = out_reg;
5102                 mono_bblock_add_inst (cfg->cbb, this);
5103         }
5104 }
5105
5106 gint
5107 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5108 {
5109         if (cmethod->klass == mono_defaults.math_class) {
5110                 if (strcmp (cmethod->name, "Sin") == 0)
5111                         return OP_SIN;
5112                 else if (strcmp (cmethod->name, "Cos") == 0)
5113                         return OP_COS;
5114                 else if (strcmp (cmethod->name, "Tan") == 0)
5115                         return OP_TAN;
5116                 else if (strcmp (cmethod->name, "Atan") == 0)
5117                         return OP_ATAN;
5118                 else if (strcmp (cmethod->name, "Sqrt") == 0)
5119                         return OP_SQRT;
5120                 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
5121                         return OP_ABS;
5122 #if 0
5123                 /* OP_FREM is not IEEE compatible */
5124                 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
5125                         return OP_FREM;
5126 #endif
5127                 else
5128                         return -1;
5129         } else {
5130                 return -1;
5131         }
5132         return -1;
5133 }
5134
5135
5136 gboolean
5137 mono_arch_print_tree (MonoInst *tree, int arity)
5138 {
5139         return 0;
5140 }
5141
5142 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5143 {
5144         MonoInst* ins;
5145         
5146         if (appdomain_tls_offset == -1)
5147                 return NULL;
5148         
5149         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5150         ins->inst_offset = appdomain_tls_offset;
5151         return ins;
5152 }
5153
5154 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5155 {
5156         MonoInst* ins;
5157         
5158         if (thread_tls_offset == -1)
5159                 return NULL;
5160         
5161         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5162         ins->inst_offset = thread_tls_offset;
5163         return ins;
5164 }