2004-08-23 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *
11  * (C) 2003 Ximian, Inc.
12  */
13 #include "mini.h"
14 #include <string.h>
15 #include <math.h>
16
17 #include <mono/metadata/appdomain.h>
18 #include <mono/metadata/debug-helpers.h>
19 #include <mono/metadata/threads.h>
20 #include <mono/metadata/profiler-private.h>
21 #include <mono/utils/mono-math.h>
22
23 #include "trace.h"
24 #include "mini-amd64.h"
25 #include "inssel.h"
26 #include "cpu-amd64.h"
27
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
33
34 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
35
36 #ifdef PLATFORM_WIN32
37 /* Under windows, the default pinvoke calling convention is stdcall */
38 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
39 #else
40 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
41 #endif
42
43 #define SIGNAL_STACK_SIZE (64 * 1024)
44
45 #define ARGS_OFFSET 16
46 #define GP_SCRATCH_REG AMD64_R11
47
48 /*
49  * AMD64 register usage:
50  * - callee saved registers are used for global register allocation
51  * - %r11 is used for materializing 64 bit constants in opcodes
52  * - the rest is used for local allocation
53  */
54
55 /*
56  * FIXME: 
57  * - Use xmm registers instead of the x87 stack
58  * - Allocate arguments to global registers
59  * - implement emulated opcodes
60  * - (all archs) do not store trampoline addresses in method->info since they
61  *   are domain specific.   
62  */
63
64 #define NOT_IMPLEMENTED g_assert_not_reached ()
65
66 const char*
67 mono_arch_regname (int reg) {
68         switch (reg) {
69         case AMD64_RAX: return "%rax";
70         case AMD64_RBX: return "%rbx";
71         case AMD64_RCX: return "%rcx";
72         case AMD64_RDX: return "%rdx";
73         case AMD64_RSP: return "%rsp";  
74         case AMD64_RBP: return "%rbp";
75         case AMD64_RDI: return "%rdi";
76         case AMD64_RSI: return "%rsi";
77         case AMD64_R8: return "%r8";
78         case AMD64_R9: return "%r9";
79         case AMD64_R10: return "%r10";
80         case AMD64_R11: return "%r11";
81         case AMD64_R12: return "%r12";
82         case AMD64_R13: return "%r13";
83         case AMD64_R14: return "%r14";
84         case AMD64_R15: return "%r15";
85         }
86         return "unknown";
87 }
88
89 static inline void 
90 amd64_patch (unsigned char* code, gpointer target)
91 {
92         /* Skip REX */
93         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
94                 code += 1;
95
96         if (code [0] == 0xbb) {
97                 /* amd64_set_reg_template */
98                 *(guint64*)(code + 1) = (guint64)target;
99         }
100         else
101                 x86_patch (code, (unsigned char*)target);
102 }
103
104 typedef enum {
105         ArgInIReg,
106         ArgInFloatSSEReg,
107         ArgInDoubleSSEReg,
108         ArgOnStack,
109         ArgValuetypeInReg,
110         ArgNone /* only in pair_storage */
111 } ArgStorage;
112
113 typedef struct {
114         gint16 offset;
115         gint8  reg;
116         ArgStorage storage;
117
118         /* Only if storage == ArgValuetypeInReg */
119         ArgStorage pair_storage [2];
120         gint8 pair_regs [2];
121 } ArgInfo;
122
123 typedef struct {
124         int nargs;
125         guint32 stack_usage;
126         guint32 reg_usage;
127         guint32 freg_usage;
128         gboolean need_stack_align;
129         ArgInfo ret;
130         ArgInfo sig_cookie;
131         ArgInfo args [1];
132 } CallInfo;
133
134 #define DEBUG(a) if (cfg->verbose_level > 1) a
135
136 #define NEW_ICONST(cfg,dest,val) do {   \
137                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
138                 (dest)->opcode = OP_ICONST;     \
139                 (dest)->inst_c0 = (val);        \
140                 (dest)->type = STACK_I4;        \
141         } while (0)
142
143 #define PARAM_REGS 6
144
145 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
146
147 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
148
149 static void inline
150 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
151 {
152     ainfo->offset = *stack_size;
153
154     if (*gr >= PARAM_REGS) {
155                 ainfo->storage = ArgOnStack;
156                 (*stack_size) += sizeof (gpointer);
157     }
158     else {
159                 ainfo->storage = ArgInIReg;
160                 ainfo->reg = param_regs [*gr];
161                 (*gr) ++;
162     }
163 }
164
165 #define FLOAT_PARAM_REGS 8
166
167 static void inline
168 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
169 {
170     ainfo->offset = *stack_size;
171
172     if (*gr >= FLOAT_PARAM_REGS) {
173                 ainfo->storage = ArgOnStack;
174                 (*stack_size) += sizeof (gpointer);
175     }
176     else {
177                 /* A double register */
178                 if (is_double)
179                         ainfo->storage = ArgInDoubleSSEReg;
180                 else
181                         ainfo->storage = ArgInFloatSSEReg;
182                 ainfo->reg = *gr;
183                 (*gr) += 1;
184     }
185 }
186
187 typedef enum ArgumentClass {
188         ARG_CLASS_NO_CLASS,
189         ARG_CLASS_MEMORY,
190         ARG_CLASS_INTEGER,
191         ARG_CLASS_SSE
192 } ArgumentClass;
193
194 static ArgumentClass
195 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
196 {
197         ArgumentClass class2;
198
199         switch (type->type) {
200         case MONO_TYPE_BOOLEAN:
201         case MONO_TYPE_CHAR:
202         case MONO_TYPE_I1:
203         case MONO_TYPE_U1:
204         case MONO_TYPE_I2:
205         case MONO_TYPE_U2:
206         case MONO_TYPE_I4:
207         case MONO_TYPE_U4:
208         case MONO_TYPE_I:
209         case MONO_TYPE_U:
210         case MONO_TYPE_STRING:
211         case MONO_TYPE_OBJECT:
212         case MONO_TYPE_CLASS:
213         case MONO_TYPE_SZARRAY:
214         case MONO_TYPE_PTR:
215         case MONO_TYPE_FNPTR:
216         case MONO_TYPE_ARRAY:
217         case MONO_TYPE_I8:
218         case MONO_TYPE_U8:
219                 class2 = ARG_CLASS_INTEGER;
220                 break;
221         case MONO_TYPE_R4:
222         case MONO_TYPE_R8:
223                 class2 = ARG_CLASS_SSE;
224                 break;
225
226         case MONO_TYPE_TYPEDBYREF:
227                 g_assert_not_reached ();
228
229         case MONO_TYPE_VALUETYPE:
230                 if (type->data.klass->enumtype)
231                         class2 = ARG_CLASS_INTEGER;
232                 else {
233                         MonoMarshalType *info = mono_marshal_load_type_info (type->data.klass);
234                         int i;
235
236                         for (i = 0; i < info->num_fields; ++i) {
237                                 class2 = class1;
238                                 class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
239                         }
240                 }
241                 break;
242         }
243
244         /* Merge */
245         if (class1 == class2)
246                 ;
247         else if (class1 == ARG_CLASS_NO_CLASS)
248                 class1 = class2;
249         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
250                 class1 = ARG_CLASS_MEMORY;
251         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
252                 class1 = ARG_CLASS_INTEGER;
253         else
254                 class1 = ARG_CLASS_SSE;
255
256         return class1;
257 }
258
259 static void
260 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
261                            gboolean is_return,
262                            guint32 *gr, guint32 *fr, guint32 *stack_size)
263 {
264         guint32 size, quad, nquads, i;
265         ArgumentClass args [2];
266         MonoMarshalType *info;
267
268         if (sig->pinvoke) 
269                 size = mono_type_native_stack_size (&type->data.klass->byval_arg, NULL);
270         else 
271                 size = mono_type_stack_size (&type->data.klass->byval_arg, NULL);
272
273         if (!sig->pinvoke || (size == 0) || (size > 16)) {
274                 /* Allways pass in memory */
275                 ainfo->offset = *stack_size;
276                 *stack_size += ALIGN_TO (size, 8);
277                 ainfo->storage = ArgOnStack;
278
279                 return;
280         }
281
282         /* FIXME: Handle structs smaller than 8 bytes */
283         //if ((size % 8) != 0)
284         //      NOT_IMPLEMENTED;
285
286         if (size > 8)
287                 nquads = 2;
288         else
289                 nquads = 1;
290
291         /*
292          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
293          * The X87 and SSEUP stuff is left out since there are no such types in
294          * the CLR.
295          */
296         info = mono_marshal_load_type_info (type->data.klass);
297         g_assert (info);
298         if (info->native_size > 16) {
299                 ainfo->offset = *stack_size;
300                 *stack_size += ALIGN_TO (info->native_size, 8);
301                 ainfo->storage = ArgOnStack;
302
303                 return;
304         }
305
306         for (quad = 0; quad < nquads; ++quad) {
307                 int size, align;
308                 ArgumentClass class1;
309                 
310                 class1 = ARG_CLASS_NO_CLASS;
311                 for (i = 0; i < info->num_fields; ++i) {
312                         size = mono_marshal_type_size (info->fields [i].field->type, 
313                                                                                    info->fields [i].mspec, 
314                                                                                    &align, TRUE, type->data.klass->unicode);
315                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
316                                 /* Unaligned field */
317                                 NOT_IMPLEMENTED;
318                         }
319
320                         /* Skip fields in other quad */
321                         if ((quad == 0) && (info->fields [i].offset >= 8))
322                                 continue;
323                         if ((quad == 1) && (info->fields [i].offset < 8))
324                                 continue;
325
326                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
327                 }
328                 g_assert (class1 != ARG_CLASS_NO_CLASS);
329                 args [quad] = class1;
330         }
331
332         /* Post merger cleanup */
333         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
334                 args [0] = args [1] = ARG_CLASS_MEMORY;
335
336         /* Allocate registers */
337         {
338                 int orig_gr = *gr;
339                 int orig_fr = *fr;
340
341                 ainfo->storage = ArgValuetypeInReg;
342                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
343                 for (quad = 0; quad < nquads; ++quad) {
344                         switch (args [quad]) {
345                         case ARG_CLASS_INTEGER:
346                                 if (*gr >= PARAM_REGS)
347                                         args [quad] = ARG_CLASS_MEMORY;
348                                 else {
349                                         ainfo->pair_storage [quad] = ArgInIReg;
350                                         if (is_return)
351                                                 ainfo->pair_regs [quad] = return_regs [*gr];
352                                         else
353                                                 ainfo->pair_regs [quad] = param_regs [*gr];
354                                         (*gr) ++;
355                                 }
356                                 break;
357                         case ARG_CLASS_SSE:
358                                 if (*fr >= FLOAT_PARAM_REGS)
359                                         args [quad] = ARG_CLASS_MEMORY;
360                                 else {
361                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
362                                         ainfo->pair_regs [quad] = *fr;
363                                         (*fr) ++;
364                                 }
365                                 break;
366                         case ARG_CLASS_MEMORY:
367                                 break;
368                         default:
369                                 g_assert_not_reached ();
370                         }
371                 }
372
373                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
374                         /* Revert possible register assignments */
375                         *gr = orig_gr;
376                         *fr = orig_fr;
377
378                         ainfo->offset = *stack_size;
379                         *stack_size += ALIGN_TO (info->native_size, 8);
380                         ainfo->storage = ArgOnStack;
381                 }
382         }
383 }
384
385 /*
386  * get_call_info:
387  *
388  *  Obtain information about a call according to the calling convention.
389  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
390  * Draft Version 0.23" document for more information.
391  */
392 static CallInfo*
393 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
394 {
395         guint32 i, gr, fr, simpletype;
396         int n = sig->hasthis + sig->param_count;
397         guint32 stack_size = 0;
398         CallInfo *cinfo;
399
400         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
401
402         gr = 0;
403         fr = 0;
404
405         /* return value */
406         {
407                 simpletype = sig->ret->type;
408 enum_retvalue:
409                 switch (simpletype) {
410                 case MONO_TYPE_BOOLEAN:
411                 case MONO_TYPE_I1:
412                 case MONO_TYPE_U1:
413                 case MONO_TYPE_I2:
414                 case MONO_TYPE_U2:
415                 case MONO_TYPE_CHAR:
416                 case MONO_TYPE_I4:
417                 case MONO_TYPE_U4:
418                 case MONO_TYPE_I:
419                 case MONO_TYPE_U:
420                 case MONO_TYPE_PTR:
421                 case MONO_TYPE_CLASS:
422                 case MONO_TYPE_OBJECT:
423                 case MONO_TYPE_SZARRAY:
424                 case MONO_TYPE_ARRAY:
425                 case MONO_TYPE_STRING:
426                         cinfo->ret.storage = ArgInIReg;
427                         cinfo->ret.reg = AMD64_RAX;
428                         break;
429                 case MONO_TYPE_U8:
430                 case MONO_TYPE_I8:
431                         cinfo->ret.storage = ArgInIReg;
432                         cinfo->ret.reg = AMD64_RAX;
433                         break;
434                 case MONO_TYPE_R4:
435                         cinfo->ret.storage = ArgInFloatSSEReg;
436                         cinfo->ret.reg = AMD64_XMM0;
437                         break;
438                 case MONO_TYPE_R8:
439                         cinfo->ret.storage = ArgInDoubleSSEReg;
440                         cinfo->ret.reg = AMD64_XMM0;
441                         break;
442                 case MONO_TYPE_VALUETYPE: {
443                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
444
445                         if (sig->ret->data.klass->enumtype) {
446                                 simpletype = sig->ret->data.klass->enum_basetype->type;
447                                 goto enum_retvalue;
448                         }
449
450                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
451                         if (cinfo->ret.storage == ArgOnStack)
452                                 /* The caller passes the address where the value is stored */
453                                 add_general (&gr, &stack_size, &cinfo->ret);
454                         break;
455                 }
456                 case MONO_TYPE_TYPEDBYREF:
457                         /* Same as a valuetype with size 24 */
458                         add_general (&gr, &stack_size, &cinfo->ret);
459                         ;
460                         break;
461                 case MONO_TYPE_VOID:
462                         break;
463                 default:
464                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
465                 }
466         }
467
468         /* this */
469         if (sig->hasthis)
470                 add_general (&gr, &stack_size, cinfo->args + 0);
471
472         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
473                 gr = PARAM_REGS;
474                 fr = FLOAT_PARAM_REGS;
475                 
476                 /* Emit the signature cookie just before the implicit arguments */
477                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
478         }
479
480         for (i = 0; i < sig->param_count; ++i) {
481                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
482
483                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
484                         /* We allways pass the sig cookie on the stack for simplicity */
485                         /* 
486                          * Prevent implicit arguments + the sig cookie from being passed 
487                          * in registers.
488                          */
489                         gr = PARAM_REGS;
490                         fr = FLOAT_PARAM_REGS;
491
492                         /* Emit the signature cookie just before the implicit arguments */
493                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
494                 }
495
496                 if (sig->params [i]->byref) {
497                         add_general (&gr, &stack_size, ainfo);
498                         continue;
499                 }
500                 simpletype = sig->params [i]->type;
501         enum_calc_size:
502                 switch (simpletype) {
503                 case MONO_TYPE_BOOLEAN:
504                 case MONO_TYPE_I1:
505                 case MONO_TYPE_U1:
506                         add_general (&gr, &stack_size, ainfo);
507                         break;
508                 case MONO_TYPE_I2:
509                 case MONO_TYPE_U2:
510                 case MONO_TYPE_CHAR:
511                         add_general (&gr, &stack_size, ainfo);
512                         break;
513                 case MONO_TYPE_I4:
514                 case MONO_TYPE_U4:
515                         add_general (&gr, &stack_size, ainfo);
516                         break;
517                 case MONO_TYPE_I:
518                 case MONO_TYPE_U:
519                 case MONO_TYPE_PTR:
520                 case MONO_TYPE_CLASS:
521                 case MONO_TYPE_OBJECT:
522                 case MONO_TYPE_STRING:
523                 case MONO_TYPE_SZARRAY:
524                 case MONO_TYPE_ARRAY:
525                         add_general (&gr, &stack_size, ainfo);
526                         break;
527                 case MONO_TYPE_VALUETYPE:
528                         if (sig->params [i]->data.klass->enumtype) {
529                                 simpletype = sig->params [i]->data.klass->enum_basetype->type;
530                                 goto enum_calc_size;
531                         }
532
533                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
534                         break;
535                 case MONO_TYPE_TYPEDBYREF:
536                         stack_size += sizeof (MonoTypedRef);
537                         ainfo->storage = ArgOnStack;
538                         break;
539                 case MONO_TYPE_U8:
540                 case MONO_TYPE_I8:
541                         add_general (&gr, &stack_size, ainfo);
542                         break;
543                 case MONO_TYPE_R4:
544                         add_float (&fr, &stack_size, ainfo, FALSE);
545                         break;
546                 case MONO_TYPE_R8:
547                         add_float (&fr, &stack_size, ainfo, TRUE);
548                         break;
549                 default:
550                         g_assert_not_reached ();
551                 }
552         }
553
554         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
555                 gr = PARAM_REGS;
556                 fr = FLOAT_PARAM_REGS;
557                 
558                 /* Emit the signature cookie just before the implicit arguments */
559                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
560         }
561
562         if (stack_size & 0x8) {
563                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
564                 cinfo->need_stack_align = TRUE;
565                 stack_size += 8;
566         }
567
568         cinfo->stack_usage = stack_size;
569         cinfo->reg_usage = gr;
570         cinfo->freg_usage = fr;
571         return cinfo;
572 }
573
574 /*
575  * mono_arch_get_argument_info:
576  * @csig:  a method signature
577  * @param_count: the number of parameters to consider
578  * @arg_info: an array to store the result infos
579  *
580  * Gathers information on parameters such as size, alignment and
581  * padding. arg_info should be large enought to hold param_count + 1 entries. 
582  *
583  * Returns the size of the activation frame.
584  */
585 int
586 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
587 {
588         int k;
589
590         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
591         if (csig->hasthis) {
592                 arg_info [0].offset = 0;
593         }
594
595         for (k = 0; k < param_count; k++) {
596                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
597                 /* FIXME: */
598                 arg_info [k + 1].size = 0;
599         }
600
601         /* FIXME: */
602         return 0;
603 }
604
605 static int 
606 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
607 {
608         return 0;
609 }
610
611 /*
612  * Initialize the cpu to execute managed code.
613  */
614 void
615 mono_arch_cpu_init (void)
616 {
617         guint16 fpcw;
618
619         /* spec compliance requires running with double precision */
620         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
621         fpcw &= ~X86_FPCW_PRECC_MASK;
622         fpcw |= X86_FPCW_PREC_DOUBLE;
623         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
624         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
625
626         mono_amd64_exceptions_init ();
627 }
628
629 /*
630  * This function returns the optimizations supported on this cpu.
631  */
632 guint32
633 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
634 {
635         int eax, ebx, ecx, edx;
636         guint32 opts = 0;
637
638         /* FIXME: AMD64 */
639
640         *exclude_mask = 0;
641         /* Feature Flags function, flags returned in EDX. */
642         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
643                 if (edx & (1 << 15)) {
644                         opts |= MONO_OPT_CMOV;
645                         if (edx & 1)
646                                 opts |= MONO_OPT_FCMOV;
647                         else
648                                 *exclude_mask |= MONO_OPT_FCMOV;
649                 } else
650                         *exclude_mask |= MONO_OPT_CMOV;
651         }
652         return opts;
653 }
654
655 static gboolean
656 is_regsize_var (MonoType *t) {
657         if (t->byref)
658                 return TRUE;
659         switch (t->type) {
660         case MONO_TYPE_I4:
661         case MONO_TYPE_U4:
662         case MONO_TYPE_I:
663         case MONO_TYPE_U:
664         case MONO_TYPE_PTR:
665                 return TRUE;
666         case MONO_TYPE_OBJECT:
667         case MONO_TYPE_STRING:
668         case MONO_TYPE_CLASS:
669         case MONO_TYPE_SZARRAY:
670         case MONO_TYPE_ARRAY:
671                 return TRUE;
672         case MONO_TYPE_VALUETYPE:
673                 if (t->data.klass->enumtype)
674                         return is_regsize_var (t->data.klass->enum_basetype);
675                 return FALSE;
676         }
677         return FALSE;
678 }
679
680 GList *
681 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
682 {
683         GList *vars = NULL;
684         int i;
685
686         for (i = 0; i < cfg->num_varinfo; i++) {
687                 MonoInst *ins = cfg->varinfo [i];
688                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
689
690                 /* unused vars */
691                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
692                         continue;
693
694                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
695                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
696                         continue;
697
698                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
699                  * 8bit quantities in caller saved registers on x86 */
700                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
701                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
702                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
703                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
704                         g_assert (i == vmv->idx);
705                         vars = g_list_prepend (vars, vmv);
706                 }
707         }
708
709         vars = mono_varlist_sort (cfg, vars, 0);
710
711         return vars;
712 }
713
714 GList *
715 mono_arch_get_global_int_regs (MonoCompile *cfg)
716 {
717         GList *regs = NULL;
718
719         /* We use the callee saved registers for global allocation */
720         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
721         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
722         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
723         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
724         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
725
726         return regs;
727 }
728
729 /*
730  * mono_arch_regalloc_cost:
731  *
732  *  Return the cost, in number of memory references, of the action of 
733  * allocating the variable VMV into a register during global register
734  * allocation.
735  */
736 guint32
737 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
738 {
739         MonoInst *ins = cfg->varinfo [vmv->idx];
740
741         if (cfg->method->save_lmf)
742                 /* The register is already saved */
743                 /* substract 1 for the invisible store in the prolog */
744                 return (ins->opcode == OP_ARG) ? 0 : 1;
745         else
746                 /* push+pop */
747                 return (ins->opcode == OP_ARG) ? 1 : 2;
748 }
749  
750 void
751 mono_arch_allocate_vars (MonoCompile *m)
752 {
753         MonoMethodSignature *sig;
754         MonoMethodHeader *header;
755         MonoInst *inst;
756         int i, offset, size, align, curinst;
757         CallInfo *cinfo;
758
759         header = ((MonoMethodNormal *)m->method)->header;
760
761         sig = m->method->signature;
762
763         cinfo = get_call_info (sig, FALSE);
764
765         /*
766          * We use the ABI calling conventions for managed code as well.
767          * Exception: valuetypes are never passed or returned in registers.
768          */
769
770         /* Locals are allocated backwards from %fp */
771         m->frame_reg = AMD64_RBP;
772         offset = 0;
773
774         /* Reserve space for caller saved registers */
775         for (i = 0; i < AMD64_NREG; ++i)
776                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (m->used_int_regs & (1 << i))) {
777                         offset += sizeof (gpointer);
778                 }
779
780         if (m->method->save_lmf) {
781                 /* Reserve stack space for saving LMF + argument regs */
782                 offset += sizeof (MonoLMF);
783                 if (lmf_tls_offset == -1)
784                         /* Need to save argument regs too */
785                         offset += (AMD64_NREG * 8) + (8 * 8);
786                 m->arch.lmf_offset = offset;
787         }
788
789         if (sig->ret->type != MONO_TYPE_VOID) {
790                 switch (cinfo->ret.storage) {
791                 case ArgInIReg:
792                 case ArgInFloatSSEReg:
793                 case ArgInDoubleSSEReg:
794                         if (((sig->ret->type == MONO_TYPE_VALUETYPE) && !sig->ret->data.klass->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
795                                 /* The register is volatile */
796                                 m->ret->opcode = OP_REGOFFSET;
797                                 m->ret->inst_basereg = AMD64_RBP;
798                                 offset += 8;
799                                 m->ret->inst_offset = - offset;
800                         }
801                         else {
802                                 m->ret->opcode = OP_REGVAR;
803                                 m->ret->inst_c0 = cinfo->ret.reg;
804                         }
805                         break;
806                 default:
807                         g_assert_not_reached ();
808                 }
809                 m->ret->dreg = m->ret->inst_c0;
810         }
811
812         curinst = m->locals_start;
813         for (i = curinst; i < m->num_varinfo; ++i) {
814                 inst = m->varinfo [i];
815
816                 if (inst->opcode == OP_REGVAR) {
817                         //g_print ("allocating local %d to %s\n", i, mono_arch_regname (inst->dreg));
818                         continue;
819                 }
820
821                 /* inst->unused indicates native sized value types, this is used by the
822                 * pinvoke wrappers when they call functions returning structure */
823                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
824                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
825                 else
826                         size = mono_type_stack_size (inst->inst_vtype, &align);
827
828                 /*
829                  * variables are accessed as negative offsets from %fp, so increase
830                  * the offset before assigning it to a variable
831                  */
832                 offset += size;
833
834                 offset += align - 1;
835                 offset &= ~(align - 1);
836                 inst->opcode = OP_REGOFFSET;
837                 inst->inst_basereg = AMD64_RBP;
838                 inst->inst_offset = - offset;
839
840                 //g_print ("allocating local %d to [%s - %d]\n", i, mono_arch_regname (inst->inst_basereg), - inst->inst_offset);
841         }
842
843         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
844                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
845                 m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
846         }
847
848         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
849                 inst = m->varinfo [i];
850                 if (inst->opcode != OP_REGVAR) {
851                         ArgInfo *ainfo = &cinfo->args [i];
852                         gboolean inreg = TRUE;
853                         MonoType *arg_type;
854
855                         if (sig->hasthis && (i == 0))
856                                 arg_type = &mono_defaults.object_class->byval_arg;
857                         else
858                                 arg_type = sig->params [i - sig->hasthis];
859
860                         /* FIXME: Allocate volatile arguments to registers */
861                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
862                                 inreg = FALSE;
863
864                         /* 
865                          * Under AMD64, all registers used to pass arguments to functions
866                          * are volatile across calls.
867                          * FIXME: Optimize this.
868                          */
869                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg))
870                                 inreg = FALSE;
871
872                         inst->opcode = OP_REGOFFSET;
873
874                         switch (ainfo->storage) {
875                         case ArgInIReg:
876                         case ArgInFloatSSEReg:
877                         case ArgInDoubleSSEReg:
878                                 inst->opcode = OP_REGVAR;
879                                 inst->dreg = ainfo->reg;
880                                 break;
881                         case ArgOnStack:
882                                 inst->opcode = OP_REGOFFSET;
883                                 inst->inst_basereg = AMD64_RBP;
884                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
885                                 break;
886                         default:
887                                 NOT_IMPLEMENTED;
888                         }
889
890                         if (!inreg && (ainfo->storage != ArgOnStack)) {
891                                 inst->opcode = OP_REGOFFSET;
892                                 inst->inst_basereg = AMD64_RBP;
893                                 /* These arguments are saved to the stack in the prolog */
894                                 offset += 8;
895                                 inst->inst_offset = - offset;
896                         }
897                 }
898         }
899
900         m->stack_offset = offset;
901
902         g_free (cinfo);
903 }
904
905 static void
906 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg, MonoInst *tree)
907 {
908         switch (storage) {
909         case ArgInIReg:
910                 arg->opcode = OP_OUTARG_REG;
911                 arg->inst_left = tree;
912                 arg->inst_right = (MonoInst*)call;
913                 arg->unused = reg;
914                 call->used_iregs |= 1 << reg;
915                 break;
916         case ArgInFloatSSEReg:
917                 /* FIXME: These are volatile as well */
918                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
919                 arg->inst_left = tree;
920                 arg->unused = reg;
921                 break;
922         case ArgInDoubleSSEReg:
923                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
924                 arg->inst_left = tree;
925                 arg->unused = reg;
926                 break;
927         default:
928                 g_assert_not_reached ();
929         }
930 }
931
932 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
933  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
934  */
935
936 static int
937 arg_storage_to_ldind (ArgStorage storage)
938 {
939         switch (storage) {
940         case ArgInIReg:
941                 return CEE_LDIND_I;
942         case ArgInDoubleSSEReg:
943                 return CEE_LDIND_R8;
944         case ArgInFloatSSEReg:
945                 return CEE_LDIND_R4;
946         default:
947                 g_assert_not_reached ();
948         }
949
950         return -1;
951 }
952
953 /* 
954  * take the arguments and generate the arch-specific
955  * instructions to properly call the function in call.
956  * This includes pushing, moving arguments to the right register
957  * etc.
958  * Issue: who does the spilling if needed, and when?
959  */
960 MonoCallInst*
961 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
962         MonoInst *arg, *in;
963         MonoMethodSignature *sig;
964         int i, n, stack_size;
965         CallInfo *cinfo;
966         ArgInfo *ainfo;
967
968         stack_size = 0;
969
970         sig = call->signature;
971         n = sig->param_count + sig->hasthis;
972
973         cinfo = get_call_info (sig, sig->pinvoke);
974
975         for (i = 0; i < n; ++i) {
976                 ainfo = cinfo->args + i;
977
978                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
979                         MonoMethodSignature *tmp_sig;
980                         
981                         /* Emit the signature cookie just before the implicit arguments */
982                         MonoInst *sig_arg;
983                         /* FIXME: Add support for signature tokens to AOT */
984                         cfg->disable_aot = TRUE;
985
986                         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
987
988                         /*
989                          * mono_ArgIterator_Setup assumes the signature cookie is 
990                          * passed first and all the arguments which were before it are
991                          * passed on the stack after the signature. So compensate by 
992                          * passing a different signature.
993                          */
994                         tmp_sig = mono_metadata_signature_dup (call->signature);
995                         tmp_sig->param_count -= call->signature->sentinelpos;
996                         tmp_sig->sentinelpos = 0;
997                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
998
999                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1000                         sig_arg->inst_p0 = tmp_sig;
1001
1002                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1003                         arg->inst_left = sig_arg;
1004                         arg->type = STACK_PTR;
1005
1006                         /* prepend, so they get reversed */
1007                         arg->next = call->out_args;
1008                         call->out_args = arg;
1009                 }
1010
1011                 if (is_virtual && i == 0) {
1012                         /* the argument will be attached to the call instruction */
1013                         in = call->args [i];
1014                 } else {
1015                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1016                         in = call->args [i];
1017                         arg->cil_code = in->cil_code;
1018                         arg->inst_left = in;
1019                         arg->type = in->type;
1020                         /* prepend, so they get reversed */
1021                         arg->next = call->out_args;
1022                         call->out_args = arg;
1023
1024                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1025                                 gint align;
1026                                 guint32 size;
1027
1028                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1029                                         size = sizeof (MonoTypedRef);
1030                                         align = sizeof (gpointer);
1031                                 }
1032                                 else
1033                                 if (sig->pinvoke)
1034                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1035                                 else
1036                                         size = mono_type_stack_size (&in->klass->byval_arg, &align);
1037                                 if (ainfo->storage == ArgValuetypeInReg) {
1038                                         if (ainfo->pair_storage [1] == ArgNone) {
1039                                                 MonoInst *load;
1040
1041                                                 /* Simpler case */
1042
1043                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1044                                                 load->inst_left = in;
1045
1046                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1047                                         }
1048                                         else {
1049                                                 /* Trees can't be shared so make a copy */
1050                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1051                                                 MonoInst *load, *load2, *offset_ins;
1052
1053                                                 /* Reg1 */
1054                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1055                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1056
1057                                                 NEW_ICONST (cfg, offset_ins, 0);
1058                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1059                                                 load2->inst_left = load;
1060                                                 load2->inst_right = offset_ins;
1061
1062                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1063                                                 load->inst_left = load2;
1064
1065                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1066
1067                                                 /* Reg2 */
1068                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1069                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1070
1071                                                 NEW_ICONST (cfg, offset_ins, 8);
1072                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1073                                                 load2->inst_left = load;
1074                                                 load2->inst_right = offset_ins;
1075
1076                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1077                                                 load->inst_left = load2;
1078
1079                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1080                                                 arg->cil_code = in->cil_code;
1081                                                 arg->type = in->type;
1082                                                 /* prepend, so they get reversed */
1083                                                 arg->next = call->out_args;
1084                                                 call->out_args = arg;
1085
1086                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load);
1087
1088                                                 /* Prepend a copy inst */
1089                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1090                                                 arg->cil_code = in->cil_code;
1091                                                 arg->inst_left = vtaddr;
1092                                                 arg->inst_right = in;
1093                                                 arg->type = in->type;
1094
1095                                                 /* prepend, so they get reversed */
1096                                                 arg->next = call->out_args;
1097                                                 call->out_args = arg;
1098                                         }
1099                                 }
1100                                 else {
1101                                         arg->opcode = OP_OUTARG_VT;
1102                                         arg->klass = in->klass;
1103                                         arg->unused = sig->pinvoke;
1104                                         arg->inst_imm = size;
1105                                 }
1106                         }
1107                         else {
1108                                 switch (ainfo->storage) {
1109                                 case ArgInIReg:
1110                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1111                                         break;
1112                                 case ArgInFloatSSEReg:
1113                                 case ArgInDoubleSSEReg:
1114                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1115                                         break;
1116                                 case ArgOnStack:
1117                                         arg->opcode = OP_OUTARG;
1118                                         if (!sig->params [i - sig->hasthis]->byref) {
1119                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1120                                                         arg->opcode = OP_OUTARG_R4;
1121                                                 else
1122                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1123                                                                 arg->opcode = OP_OUTARG_R8;
1124                                         }
1125                                         break;
1126                                 default:
1127                                         g_assert_not_reached ();
1128                                 }
1129                         }
1130                 }
1131         }
1132
1133         if (cinfo->need_stack_align) {
1134                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1135                 /* prepend, so they get reversed */
1136                 arg->next = call->out_args;
1137                 call->out_args = arg;
1138         }
1139
1140         call->stack_usage = cinfo->stack_usage;
1141         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1142         cfg->flags |= MONO_CFG_HAS_CALLS;
1143
1144         g_free (cinfo);
1145
1146         return call;
1147 }
1148
1149 #define EMIT_COND_BRANCH(ins,cond,sign) \
1150 if (ins->flags & MONO_INST_BRLABEL) { \
1151         if (ins->inst_i0->inst_c0) { \
1152                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1153         } else { \
1154                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1155                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1156                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1157                         x86_branch8 (code, cond, 0, sign); \
1158                 else \
1159                         x86_branch32 (code, cond, 0, sign); \
1160         } \
1161 } else { \
1162         if (ins->inst_true_bb->native_offset) { \
1163                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1164         } else { \
1165                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1166                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1167                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1168                         x86_branch8 (code, cond, 0, sign); \
1169                 else \
1170                         x86_branch32 (code, cond, 0, sign); \
1171         } \
1172 }
1173
1174 /* emit an exception if condition is fail */
1175 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1176         do {                                                        \
1177                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1178                                     MONO_PATCH_INFO_EXC, exc_name);  \
1179                 x86_branch32 (code, cond, 0, signed);               \
1180         } while (0); 
1181
1182 #define EMIT_FPCOMPARE(code) do { \
1183         amd64_fcompp (code); \
1184         amd64_fnstsw (code); \
1185 } while (0); 
1186
1187 /*
1188  * Emitting a call and patching it later is expensive on amd64, so try to
1189  * determine the patch target immediately, and emit more efficient code if
1190  * possible.
1191  */
1192 static guint8*
1193 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1194 {
1195         /* FIXME: */
1196         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1197         amd64_set_reg_template (code, GP_SCRATCH_REG);
1198         amd64_call_reg (code, GP_SCRATCH_REG);
1199
1200         return code;
1201 }
1202
1203 #define EMIT_CALL() do { \
1204     amd64_set_reg_template (code, GP_SCRATCH_REG); \
1205     amd64_call_reg (code, GP_SCRATCH_REG); \
1206 } while (0);
1207
1208 /* FIXME: Add more instructions */
1209 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM))
1210
1211 static void
1212 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1213 {
1214         MonoInst *ins, *last_ins = NULL;
1215         ins = bb->code;
1216
1217         while (ins) {
1218
1219                 switch (ins->opcode) {
1220                 case OP_ICONST:
1221                         /* reg = 0 -> XOR (reg, reg) */
1222                         /* XOR sets cflags on x86, so we cant do it always */
1223                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1224                                 ins->opcode = CEE_XOR;
1225                                 ins->sreg1 = ins->dreg;
1226                                 ins->sreg2 = ins->dreg;
1227                         }
1228                         break;
1229                 case OP_MUL_IMM: 
1230                         /* remove unnecessary multiplication with 1 */
1231                         if (ins->inst_imm == 1) {
1232                                 if (ins->dreg != ins->sreg1) {
1233                                         ins->opcode = OP_MOVE;
1234                                 } else {
1235                                         last_ins->next = ins->next;
1236                                         ins = ins->next;
1237                                         continue;
1238                                 }
1239                         }
1240                         break;
1241                 case OP_COMPARE_IMM:
1242                         /* OP_COMPARE_IMM (reg, 0) 
1243                          * --> 
1244                          * OP_AMD64_TEST_NULL (reg) 
1245                          */
1246                         if (!ins->inst_imm)
1247                                 ins->opcode = OP_X86_TEST_NULL;
1248                         break;
1249                 case OP_X86_COMPARE_MEMBASE_IMM:
1250                         /* 
1251                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1252                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1253                          * -->
1254                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1255                          * OP_COMPARE_IMM reg, imm
1256                          *
1257                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1258                          */
1259                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1260                             ins->inst_basereg == last_ins->inst_destbasereg &&
1261                             ins->inst_offset == last_ins->inst_offset) {
1262                                         ins->opcode = OP_COMPARE_IMM;
1263                                         ins->sreg1 = last_ins->sreg1;
1264
1265                                         /* check if we can remove cmp reg,0 with test null */
1266                                         if (!ins->inst_imm)
1267                                                 ins->opcode = OP_X86_TEST_NULL;
1268                                 }
1269
1270                         break;
1271                 case OP_LOAD_MEMBASE:
1272                 case OP_LOADI4_MEMBASE:
1273                         /* 
1274                          * Note: if reg1 = reg2 the load op is removed
1275                          *
1276                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1277                          * OP_LOAD_MEMBASE offset(basereg), reg2
1278                          * -->
1279                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1280                          * OP_MOVE reg1, reg2
1281                          */
1282                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1283                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1284                             ins->inst_basereg == last_ins->inst_destbasereg &&
1285                             ins->inst_offset == last_ins->inst_offset) {
1286                                 if (ins->dreg == last_ins->sreg1) {
1287                                         last_ins->next = ins->next;                             
1288                                         ins = ins->next;                                
1289                                         continue;
1290                                 } else {
1291                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1292                                         ins->opcode = OP_MOVE;
1293                                         ins->sreg1 = last_ins->sreg1;
1294                                 }
1295
1296                         /* 
1297                          * Note: reg1 must be different from the basereg in the second load
1298                          * Note: if reg1 = reg2 is equal then second load is removed
1299                          *
1300                          * OP_LOAD_MEMBASE offset(basereg), reg1
1301                          * OP_LOAD_MEMBASE offset(basereg), reg2
1302                          * -->
1303                          * OP_LOAD_MEMBASE offset(basereg), reg1
1304                          * OP_MOVE reg1, reg2
1305                          */
1306                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1307                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1308                               ins->inst_basereg != last_ins->dreg &&
1309                               ins->inst_basereg == last_ins->inst_basereg &&
1310                               ins->inst_offset == last_ins->inst_offset) {
1311
1312                                 if (ins->dreg == last_ins->dreg) {
1313                                         last_ins->next = ins->next;                             
1314                                         ins = ins->next;                                
1315                                         continue;
1316                                 } else {
1317                                         ins->opcode = OP_MOVE;
1318                                         ins->sreg1 = last_ins->dreg;
1319                                 }
1320
1321                                 //g_assert_not_reached ();
1322
1323 #if 0
1324                         /* 
1325                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1326                          * OP_LOAD_MEMBASE offset(basereg), reg
1327                          * -->
1328                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1329                          * OP_ICONST reg, imm
1330                          */
1331                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1332                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1333                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1334                                    ins->inst_offset == last_ins->inst_offset) {
1335                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1336                                 ins->opcode = OP_ICONST;
1337                                 ins->inst_c0 = last_ins->inst_imm;
1338                                 g_assert_not_reached (); // check this rule
1339 #endif
1340                         }
1341                         break;
1342                 case OP_LOADU1_MEMBASE:
1343                 case OP_LOADI1_MEMBASE:
1344                         /* 
1345                          * Note: if reg1 = reg2 the load op is removed
1346                          *
1347                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1348                          * OP_LOAD_MEMBASE offset(basereg), reg2
1349                          * -->
1350                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1351                          * OP_MOVE reg1, reg2
1352                          */
1353                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1354                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1355                                         ins->inst_offset == last_ins->inst_offset) {
1356                                 if (ins->dreg == last_ins->sreg1) {
1357                                         last_ins->next = ins->next;                             
1358                                         ins = ins->next;                                
1359                                         continue;
1360                                 } else {
1361                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1362                                         ins->opcode = OP_MOVE;
1363                                         ins->sreg1 = last_ins->sreg1;
1364                                 }
1365                         }
1366                         break;
1367                 case OP_LOADU2_MEMBASE:
1368                 case OP_LOADI2_MEMBASE:
1369                         /* 
1370                          * Note: if reg1 = reg2 the load op is removed
1371                          *
1372                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1373                          * OP_LOAD_MEMBASE offset(basereg), reg2
1374                          * -->
1375                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1376                          * OP_MOVE reg1, reg2
1377                          */
1378                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1379                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1380                                         ins->inst_offset == last_ins->inst_offset) {
1381                                 if (ins->dreg == last_ins->sreg1) {
1382                                         last_ins->next = ins->next;                             
1383                                         ins = ins->next;                                
1384                                         continue;
1385                                 } else {
1386                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1387                                         ins->opcode = OP_MOVE;
1388                                         ins->sreg1 = last_ins->sreg1;
1389                                 }
1390                         }
1391                         break;
1392                 case CEE_CONV_I4:
1393                 case CEE_CONV_U4:
1394                 case OP_MOVE:
1395                         /*
1396                          * Removes:
1397                          *
1398                          * OP_MOVE reg, reg 
1399                          */
1400                         if (ins->dreg == ins->sreg1) {
1401                                 if (last_ins)
1402                                         last_ins->next = ins->next;                             
1403                                 ins = ins->next;
1404                                 continue;
1405                         }
1406                         /* 
1407                          * Removes:
1408                          *
1409                          * OP_MOVE sreg, dreg 
1410                          * OP_MOVE dreg, sreg
1411                          */
1412                         if (last_ins && last_ins->opcode == OP_MOVE &&
1413                             ins->sreg1 == last_ins->dreg &&
1414                             ins->dreg == last_ins->sreg1) {
1415                                 last_ins->next = ins->next;                             
1416                                 ins = ins->next;                                
1417                                 continue;
1418                         }
1419                         break;
1420                 }
1421                 last_ins = ins;
1422                 ins = ins->next;
1423         }
1424         bb->last_ins = last_ins;
1425 }
1426
1427 static const int 
1428 branch_cc_table [] = {
1429         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1430         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1431         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1432 };
1433
1434 static int
1435 opcode_to_x86_cond (int opcode)
1436 {
1437         switch (opcode) {
1438         case OP_IBEQ:
1439                 return X86_CC_EQ;
1440         case OP_IBNE_UN:
1441                 return X86_CC_NE;
1442         case OP_IBLT:
1443                 return X86_CC_LT;
1444         case OP_IBLT_UN:
1445                 return X86_CC_LT;
1446         case OP_IBGT:
1447                 return X86_CC_GT;
1448         case OP_IBGT_UN:
1449                 return X86_CC_GT;
1450         case OP_IBGE:
1451                 return X86_CC_GE;
1452         case OP_IBGE_UN:
1453                 return X86_CC_GE;
1454         case OP_IBLE:
1455                 return X86_CC_LE;
1456         case OP_IBLE_UN:
1457                 return X86_CC_LE;
1458         case OP_COND_EXC_IOV:
1459                 return X86_CC_O;
1460         case OP_COND_EXC_IC:
1461                 return X86_CC_C;
1462         default:
1463                 g_assert_not_reached ();
1464         }
1465
1466         return -1;
1467 }
1468
1469 /*
1470  * returns the offset used by spillvar. It allocates a new
1471  * spill variable if necessary. 
1472  */
1473 static int
1474 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1475 {
1476         MonoSpillInfo **si, *info;
1477         int i = 0;
1478
1479         si = &cfg->spill_info; 
1480         
1481         while (i <= spillvar) {
1482
1483                 if (!*si) {
1484                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1485                         info->next = NULL;
1486                         cfg->stack_offset += sizeof (gpointer);
1487                         info->offset = - cfg->stack_offset;
1488                 }
1489
1490                 if (i == spillvar)
1491                         return (*si)->offset;
1492
1493                 i++;
1494                 si = &(*si)->next;
1495         }
1496
1497         g_assert_not_reached ();
1498         return 0;
1499 }
1500
1501 /*
1502  * returns the offset used by spillvar. It allocates a new
1503  * spill float variable if necessary. 
1504  * (same as mono_spillvar_offset but for float)
1505  */
1506 static int
1507 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1508 {
1509         MonoSpillInfo **si, *info;
1510         int i = 0;
1511
1512         si = &cfg->spill_info_float; 
1513         
1514         while (i <= spillvar) {
1515
1516                 if (!*si) {
1517                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1518                         info->next = NULL;
1519                         cfg->stack_offset += sizeof (double);
1520                         info->offset = - cfg->stack_offset;
1521                 }
1522
1523                 if (i == spillvar)
1524                         return (*si)->offset;
1525
1526                 i++;
1527                 si = &(*si)->next;
1528         }
1529
1530         g_assert_not_reached ();
1531         return 0;
1532 }
1533
1534 /*
1535  * Creates a store for spilled floating point items
1536  */
1537 static MonoInst*
1538 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1539 {
1540         MonoInst *store;
1541         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1542         store->sreg1 = reg;
1543         store->inst_destbasereg = AMD64_RBP;
1544         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1545
1546         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)store->inst_offset, reg));
1547         return store;
1548 }
1549
1550 /*
1551  * Creates a load for spilled floating point items 
1552  */
1553 static MonoInst*
1554 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1555 {
1556         MonoInst *load;
1557         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1558         load->dreg = reg;
1559         load->inst_basereg = AMD64_RBP;
1560         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1561
1562         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)load->inst_offset, reg));
1563         return load;
1564 }
1565
1566 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && AMD64_IS_CALLEE_REG ((r)))
1567
1568 typedef struct {
1569         int born_in;
1570         int killed_in;
1571         int last_use;
1572         int prev_use;
1573         int flags;              /* used to track fp spill/load */
1574 } RegTrack;
1575
1576 static const char*const * ins_spec = amd64_desc;
1577
1578 static void
1579 print_ins (int i, MonoInst *ins)
1580 {
1581         const char *spec = ins_spec [ins->opcode];
1582         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1583         if (!spec)
1584                 g_error ("Unknown opcode: %s\n", mono_inst_name (ins->opcode));
1585         if (spec [MONO_INST_DEST]) {
1586                 if (ins->dreg >= MONO_MAX_IREGS)
1587                         g_print (" R%d <-", ins->dreg);
1588                 else
1589                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1590         }
1591         if (spec [MONO_INST_SRC1]) {
1592                 if (ins->sreg1 >= MONO_MAX_IREGS)
1593                         g_print (" R%d", ins->sreg1);
1594                 else
1595                         g_print (" %s", mono_arch_regname (ins->sreg1));
1596         }
1597         if (spec [MONO_INST_SRC2]) {
1598                 if (ins->sreg2 >= MONO_MAX_IREGS)
1599                         g_print (" R%d", ins->sreg2);
1600                 else
1601                         g_print (" %s", mono_arch_regname (ins->sreg2));
1602         }
1603         if (spec [MONO_INST_CLOB])
1604                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1605         g_print ("\n");
1606 }
1607
1608 static void
1609 print_regtrack (RegTrack *t, int num)
1610 {
1611         int i;
1612         char buf [32];
1613         const char *r;
1614         
1615         for (i = 0; i < num; ++i) {
1616                 if (!t [i].born_in)
1617                         continue;
1618                 if (i >= MONO_MAX_IREGS) {
1619                         g_snprintf (buf, sizeof(buf), "R%d", i);
1620                         r = buf;
1621                 } else
1622                         r = mono_arch_regname (i);
1623                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1624         }
1625 }
1626
1627 typedef struct InstList InstList;
1628
1629 struct InstList {
1630         InstList *prev;
1631         InstList *next;
1632         MonoInst *data;
1633 };
1634
1635 static inline InstList*
1636 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1637 {
1638         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1639         item->data = data;
1640         item->prev = NULL;
1641         item->next = list;
1642         if (list)
1643                 list->prev = item;
1644         return item;
1645 }
1646
1647 /*
1648  * Force the spilling of the variable in the symbolic register 'reg'.
1649  */
1650 static int
1651 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1652 {
1653         MonoInst *load;
1654         int i, sel, spill;
1655         
1656         sel = cfg->rs->iassign [reg];
1657         /*i = cfg->rs->isymbolic [sel];
1658         g_assert (i == reg);*/
1659         i = reg;
1660         spill = ++cfg->spill_count;
1661         cfg->rs->iassign [i] = -spill - 1;
1662         mono_regstate_free_int (cfg->rs, sel);
1663         /* we need to create a spill var and insert a load to sel after the current instruction */
1664         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1665         load->dreg = sel;
1666         load->inst_basereg = AMD64_RBP;
1667         load->inst_offset = mono_spillvar_offset (cfg, spill);
1668         if (item->prev) {
1669                 while (ins->next != item->prev->data)
1670                         ins = ins->next;
1671         }
1672         load->next = ins->next;
1673         ins->next = load;
1674         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1675         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1676         g_assert (i == sel);
1677
1678         return sel;
1679 }
1680
1681 static int
1682 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1683 {
1684         MonoInst *load;
1685         int i, sel, spill;
1686
1687         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1688         /* exclude the registers in the current instruction */
1689         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1690                 if (ins->sreg1 >= MONO_MAX_IREGS)
1691                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1692                 else
1693                         regmask &= ~ (1 << ins->sreg1);
1694                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1695         }
1696         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1697                 if (ins->sreg2 >= MONO_MAX_IREGS)
1698                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1699                 else
1700                         regmask &= ~ (1 << ins->sreg2);
1701                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1702         }
1703         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1704                 regmask &= ~ (1 << ins->dreg);
1705                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1706         }
1707
1708         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1709         g_assert (regmask); /* need at least a register we can free */
1710         sel = -1;
1711         /* we should track prev_use and spill the register that's farther */
1712         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1713                 if (regmask & (1 << i)) {
1714                         sel = i;
1715                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1716                         break;
1717                 }
1718         }
1719         i = cfg->rs->isymbolic [sel];
1720         spill = ++cfg->spill_count;
1721         cfg->rs->iassign [i] = -spill - 1;
1722         mono_regstate_free_int (cfg->rs, sel);
1723         /* we need to create a spill var and insert a load to sel after the current instruction */
1724         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1725         load->dreg = sel;
1726         load->inst_basereg = AMD64_RBP;
1727         load->inst_offset = mono_spillvar_offset (cfg, spill);
1728         if (item->prev) {
1729                 while (ins->next != item->prev->data)
1730                         ins = ins->next;
1731         }
1732         load->next = ins->next;
1733         ins->next = load;
1734         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1735         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1736         g_assert (i == sel);
1737         
1738         return sel;
1739 }
1740
1741 static MonoInst*
1742 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1743 {
1744         MonoInst *copy;
1745         MONO_INST_NEW (cfg, copy, OP_MOVE);
1746         copy->dreg = dest;
1747         copy->sreg1 = src;
1748         if (ins) {
1749                 copy->next = ins->next;
1750                 ins->next = copy;
1751         }
1752         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1753         return copy;
1754 }
1755
1756 static MonoInst*
1757 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1758 {
1759         MonoInst *store;
1760         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1761         store->sreg1 = reg;
1762         store->inst_destbasereg = AMD64_RBP;
1763         store->inst_offset = mono_spillvar_offset (cfg, spill);
1764         if (ins) {
1765                 store->next = ins->next;
1766                 ins->next = store;
1767         }
1768         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", spill, (long)store->inst_offset, prev_reg, mono_arch_regname (reg)));
1769         return store;
1770 }
1771
1772 static void
1773 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1774 {
1775         MonoInst *prev;
1776         if (item->next) {
1777                 prev = item->next->data;
1778
1779                 while (prev->next != ins)
1780                         prev = prev->next;
1781                 to_insert->next = ins;
1782                 prev->next = to_insert;
1783         } else {
1784                 to_insert->next = ins;
1785         }
1786         /* 
1787          * needed otherwise in the next instruction we can add an ins to the 
1788          * end and that would get past this instruction.
1789          */
1790         item->data = to_insert; 
1791 }
1792
1793
1794 #if  0
1795 static int
1796 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1797 {
1798         int val = cfg->rs->iassign [sym_reg];
1799         if (val < 0) {
1800                 int spill = 0;
1801                 if (val < -1) {
1802                         /* the register gets spilled after this inst */
1803                         spill = -val -1;
1804                 }
1805                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1806                 if (val < 0)
1807                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1808                 cfg->rs->iassign [sym_reg] = val;
1809                 /* add option to store before the instruction for src registers */
1810                 if (spill)
1811                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1812         }
1813         cfg->rs->isymbolic [val] = sym_reg;
1814         return val;
1815 }
1816 #endif
1817
1818 /* flags used in reginfo->flags */
1819 enum {
1820         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1821         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1822         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1823         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1824         MONO_X86_REG_EAX                                = 1 << 4,
1825         MONO_X86_REG_EDX                                = 1 << 5,
1826         MONO_X86_REG_ECX                                = 1 << 6
1827 };
1828
1829 static int
1830 mono_amd64_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1831 {
1832         int val;
1833         int test_mask = dest_mask;
1834
1835         if (flags & MONO_X86_REG_EAX)
1836                 test_mask &= (1 << AMD64_RAX);
1837         else if (flags & MONO_X86_REG_EDX)
1838                 test_mask &= (1 << AMD64_RDX);
1839         else if (flags & MONO_X86_REG_ECX)
1840                 test_mask &= (1 << AMD64_RCX);
1841         else if (flags & MONO_X86_REG_NOT_ECX)
1842                 test_mask &= ~ (1 << AMD64_RCX);
1843
1844         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1845         if (val >= 0 && test_mask != dest_mask)
1846                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1847
1848         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1849                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1850                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << AMD64_RCX)));
1851         }
1852
1853         if (val < 0) {
1854                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1855                 if (val < 0)
1856                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1857         }
1858
1859         return val;
1860 }
1861
1862
1863 /*#include "cprop.c"*/
1864
1865 /*
1866  * Local register allocation.
1867  * We first scan the list of instructions and we save the liveness info of
1868  * each register (when the register is first used, when it's value is set etc.).
1869  * We also reverse the list of instructions (in the InstList list) because assigning
1870  * registers backwards allows for more tricks to be used.
1871  */
1872 void
1873 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1874 {
1875         MonoInst *ins;
1876         MonoRegState *rs = cfg->rs;
1877         int i, val, fpcount;
1878         RegTrack *reginfo, *reginfof;
1879         RegTrack *reginfo1, *reginfo2, *reginfod;
1880         InstList *tmp, *reversed = NULL;
1881         const char *spec;
1882         guint32 src1_mask, src2_mask, dest_mask;
1883         GList *fspill_list = NULL;
1884         int fspill = 0;
1885
1886         if (!bb->code)
1887                 return;
1888         rs->next_vireg = bb->max_ireg;
1889         rs->next_vfreg = bb->max_freg;
1890         mono_regstate_assign (rs);
1891         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1892         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1893         rs->ifree_mask = AMD64_CALLEE_REGS;
1894
1895         ins = bb->code;
1896
1897         /*if (cfg->opt & MONO_OPT_COPYPROP)
1898                 local_copy_prop (cfg, ins);*/
1899
1900         i = 1;
1901         fpcount = 0;
1902         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1903         /* forward pass on the instructions to collect register liveness info */
1904         while (ins) {
1905                 spec = ins_spec [ins->opcode];
1906                 
1907                 DEBUG (print_ins (i, ins));
1908
1909                 if (spec [MONO_INST_SRC1]) {
1910                         if (spec [MONO_INST_SRC1] == 'f') {
1911                                 GList *spill;
1912                                 reginfo1 = reginfof;
1913
1914                                 spill = g_list_first (fspill_list);
1915                                 if (spill && fpcount < MONO_MAX_FREGS) {
1916                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1917                                         fspill_list = g_list_remove (fspill_list, spill->data);
1918                                 } else
1919                                         fpcount--;
1920                         }
1921                         else
1922                                 reginfo1 = reginfo;
1923                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1924                         reginfo1 [ins->sreg1].last_use = i;
1925                         if (spec [MONO_INST_SRC1] == 'L') {
1926                                 /* The virtual register is allocated sequentially */
1927                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1928                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1929                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1930                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1931
1932                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1933                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1934                         }
1935                 } else {
1936                         ins->sreg1 = -1;
1937                 }
1938                 if (spec [MONO_INST_SRC2]) {
1939                         if (spec [MONO_INST_SRC2] == 'f') {
1940                                 GList *spill;
1941                                 reginfo2 = reginfof;
1942                                 spill = g_list_first (fspill_list);
1943                                 if (spill) {
1944                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1945                                         fspill_list = g_list_remove (fspill_list, spill->data);
1946                                         if (fpcount >= MONO_MAX_FREGS) {
1947                                                 fspill++;
1948                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1949                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1950                                         }
1951                                 } else
1952                                         fpcount--;
1953                         }
1954                         else
1955                                 reginfo2 = reginfo;
1956                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1957                         reginfo2 [ins->sreg2].last_use = i;
1958                         if (spec [MONO_INST_SRC2] == 'L') {
1959                                 /* The virtual register is allocated sequentially */
1960                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1961                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1962                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1963                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1964                         }
1965                         if (spec [MONO_INST_CLOB] == 's') {
1966                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1967                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1968                         }
1969                 } else {
1970                         ins->sreg2 = -1;
1971                 }
1972                 if (spec [MONO_INST_DEST]) {
1973                         if (spec [MONO_INST_DEST] == 'f') {
1974                                 reginfod = reginfof;
1975                                 if (fpcount >= MONO_MAX_FREGS) {
1976                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1977                                         fspill++;
1978                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1979                                         fpcount--;
1980                                 }
1981                                 fpcount++;
1982                         }
1983                         else
1984                                 reginfod = reginfo;
1985                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1986                                 reginfod [ins->dreg].killed_in = i;
1987                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1988                         reginfod [ins->dreg].last_use = i;
1989                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1990                                 reginfod [ins->dreg].born_in = i;
1991                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1992                                 /* The virtual register is allocated sequentially */
1993                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1994                                 reginfod [ins->dreg + 1].last_use = i;
1995                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1996                                         reginfod [ins->dreg + 1].born_in = i;
1997
1998                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1999                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
2000                         }
2001                 } else {
2002                         ins->dreg = -1;
2003                 }
2004
2005                 if (spec [MONO_INST_CLOB] == 'c') {
2006                         /* A call instruction implicitly uses all registers in call->out_reg_args */
2007
2008                         MonoCallInst *call = (MonoCallInst*)ins;
2009                         GSList *list;
2010
2011                         list = call->out_reg_args;
2012                         if (list) {
2013                                 while (list) {
2014                                         guint64 regpair;
2015                                         int reg, hreg;
2016
2017                                         regpair = (guint64) (list->data);
2018                                         hreg = regpair >> 32;
2019                                         reg = regpair & 0xffffffff;
2020
2021                                         reginfo [reg].prev_use = reginfo [reg].last_use;
2022                                         reginfo [reg].last_use = i;
2023
2024                                         list = g_slist_next (list);
2025                                 }
2026                         }
2027                 }
2028
2029                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2030                 ++i;
2031                 ins = ins->next;
2032         }
2033
2034         // todo: check if we have anything left on fp stack, in verify mode?
2035         fspill = 0;
2036
2037         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2038         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2039         tmp = reversed;
2040         while (tmp) {
2041                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2042                 dest_mask = src1_mask = src2_mask = AMD64_CALLEE_REGS;
2043                 --i;
2044                 ins = tmp->data;
2045                 spec = ins_spec [ins->opcode];
2046                 prev_dreg = -1;
2047                 clob_dreg = -1;
2048                 DEBUG (g_print ("processing:"));
2049                 DEBUG (print_ins (i, ins));
2050                 if (spec [MONO_INST_CLOB] == 's') {
2051                         if (rs->ifree_mask & (1 << AMD64_RCX)) {
2052                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2053                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2054                                         /* Argument already in hard reg, need to copy */
2055                                         MonoInst *copy = create_copy_ins (cfg, AMD64_RCX, ins->sreg2, NULL);
2056                                         insert_before_ins (ins, tmp, copy);
2057                                 }
2058                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2059                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2060                                 ins->sreg2 = AMD64_RCX;
2061                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2062                         } else {
2063                                 int need_ecx_spill = TRUE;
2064                                 /* 
2065                                  * we first check if src1/dreg is already assigned a register
2066                                  * and then we force a spill of the var assigned to ECX.
2067                                  */
2068                                 /* the destination register can't be ECX */
2069                                 dest_mask &= ~ (1 << AMD64_RCX);
2070                                 src1_mask &= ~ (1 << AMD64_RCX);
2071                                 val = rs->iassign [ins->dreg];
2072                                 /* 
2073                                  * the destination register is already assigned to ECX:
2074                                  * we need to allocate another register for it and then
2075                                  * copy from this to ECX.
2076                                  */
2077                                 if (val == AMD64_RCX && ins->dreg != ins->sreg2) {
2078                                         int new_dest;
2079                                         new_dest = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2080                                         g_assert (new_dest >= 0);
2081                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2082
2083                                         rs->isymbolic [new_dest] = ins->dreg;
2084                                         rs->iassign [ins->dreg] = new_dest;
2085                                         clob_dreg = ins->dreg;
2086                                         ins->dreg = new_dest;
2087                                         create_copy_ins (cfg, AMD64_RCX, new_dest, ins);
2088                                         need_ecx_spill = FALSE;
2089                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2090                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2091                                         rs->iassign [ins->dreg] = val;
2092                                         rs->isymbolic [val] = prev_dreg;
2093                                         ins->dreg = val;*/
2094                                 }
2095                                 val = rs->iassign [ins->sreg2];
2096                                 if (val >= 0 && val != AMD64_RCX) {
2097                                         MonoInst *move = create_copy_ins (cfg, AMD64_RCX, val, NULL);
2098                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2099                                         move->next = ins;
2100                                         g_assert_not_reached ();
2101                                         /* FIXME: where is move connected to the instruction list? */
2102                                         //tmp->prev->data->next = move;
2103                                 }
2104                                 else 
2105                                         if (val == AMD64_RCX) {
2106                                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2107                                                         /* sreg2 is already assigned to a hard reg, need to copy */
2108                                                         MonoInst *copy = create_copy_ins (cfg, AMD64_RCX, ins->sreg2, NULL);
2109                                                         insert_before_ins (ins, tmp, copy);
2110                                                 }
2111                                                 need_ecx_spill = FALSE;
2112                                         }
2113                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << AMD64_RCX))) {
2114                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RCX]));
2115                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RCX]);
2116                                         mono_regstate_free_int (rs, AMD64_RCX);
2117                                 }
2118                                 /* force-set sreg2 */
2119                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2120                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2121                                 ins->sreg2 = AMD64_RCX;
2122                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2123                         }
2124                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
2125                         int dest_reg = AMD64_RAX;
2126                         int clob_reg = AMD64_RDX;
2127                         if (spec [MONO_INST_DEST] == 'd') {
2128                                 dest_reg = AMD64_RDX; /* reminder */
2129                                 clob_reg = AMD64_RAX;
2130                         }
2131                         val = rs->iassign [ins->dreg];
2132                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2133                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2134                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2135                                 mono_regstate_free_int (rs, dest_reg);
2136                         }
2137                         if (val < 0) {
2138                                 if (val < -1) {
2139                                         /* the register gets spilled after this inst */
2140                                         int spill = -val -1;
2141                                         dest_mask = 1 << clob_reg;
2142                                         prev_dreg = ins->dreg;
2143                                         val = mono_regstate_alloc_int (rs, dest_mask);
2144                                         if (val < 0)
2145                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2146                                         rs->iassign [ins->dreg] = val;
2147                                         if (spill)
2148                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2149                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2150                                         rs->isymbolic [val] = prev_dreg;
2151                                         ins->dreg = val;
2152                                         if (val != dest_reg) { /* force a copy */
2153                                                 create_copy_ins (cfg, val, dest_reg, ins);
2154                                         }
2155                                 } else {
2156                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2157                                         prev_dreg = ins->dreg;
2158                                         rs->iassign [ins->dreg] = dest_reg;
2159                                         rs->isymbolic [dest_reg] = ins->dreg;
2160                                         ins->dreg = dest_reg;
2161                                         rs->ifree_mask &= ~ (1 << dest_reg);
2162                                 }
2163                         } else {
2164                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2165                                 if (val != dest_reg) { /* force a copy */
2166                                         create_copy_ins (cfg, val, dest_reg, ins);
2167                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2168                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2169                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2170                                                 mono_regstate_free_int (rs, dest_reg);
2171                                         }
2172                                 }
2173                         }
2174                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2175                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2176                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2177                                 mono_regstate_free_int (rs, clob_reg);
2178                         }
2179                         src1_mask = 1 << AMD64_RAX;
2180                         src2_mask = 1 << AMD64_RCX;
2181                 }
2182                 if (spec [MONO_INST_DEST] == 'l') {
2183                         int hreg;
2184                         val = rs->iassign [ins->dreg];
2185                         /* check special case when dreg have been moved from ecx (clob shift) */
2186                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2187                                 hreg = clob_dreg + 1;
2188                         else
2189                                 hreg = ins->dreg + 1;
2190
2191                         /* base prev_dreg on fixed hreg, handle clob case */
2192                         val = hreg - 1;
2193
2194                         if (val != rs->isymbolic [AMD64_RAX] && !(rs->ifree_mask & (1 << AMD64_RAX))) {
2195                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2196                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2197                                 mono_regstate_free_int (rs, AMD64_RAX);
2198                         }
2199                         if (hreg != rs->isymbolic [AMD64_RDX] && !(rs->ifree_mask & (1 << AMD64_RDX))) {
2200                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [AMD64_RDX]));
2201                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RDX]);
2202                                 mono_regstate_free_int (rs, AMD64_RDX);
2203                         }
2204                 }
2205
2206                 /* Track dreg */
2207                 if (spec [MONO_INST_DEST] == 'f') {
2208                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2209                                 GList *spill_node;
2210                                 MonoInst *store;
2211                                 spill_node = g_list_first (fspill_list);
2212                                 g_assert (spill_node);
2213
2214                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2215                                 insert_before_ins (ins, tmp, store);
2216                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2217                                 fspill--;
2218                         }
2219                 } else if (spec [MONO_INST_DEST] == 'L') {
2220                         int hreg;
2221                         val = rs->iassign [ins->dreg];
2222                         /* check special case when dreg have been moved from ecx (clob shift) */
2223                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2224                                 hreg = clob_dreg + 1;
2225                         else
2226                                 hreg = ins->dreg + 1;
2227
2228                         /* base prev_dreg on fixed hreg, handle clob case */
2229                         prev_dreg = hreg - 1;
2230
2231                         if (val < 0) {
2232                                 int spill = 0;
2233                                 if (val < -1) {
2234                                         /* the register gets spilled after this inst */
2235                                         spill = -val -1;
2236                                 }
2237                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2238                                 rs->iassign [ins->dreg] = val;
2239                                 if (spill)
2240                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2241                         }
2242
2243                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2244  
2245                         rs->isymbolic [val] = hreg - 1;
2246                         ins->dreg = val;
2247                         
2248                         val = rs->iassign [hreg];
2249                         if (val < 0) {
2250                                 int spill = 0;
2251                                 if (val < -1) {
2252                                         /* the register gets spilled after this inst */
2253                                         spill = -val -1;
2254                                 }
2255                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2256                                 rs->iassign [hreg] = val;
2257                                 if (spill)
2258                                         create_spilled_store (cfg, spill, val, hreg, ins);
2259                         }
2260
2261                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2262                         rs->isymbolic [val] = hreg;
2263                         /* save reg allocating into unused */
2264                         ins->unused = val;
2265
2266                         /* check if we can free our long reg */
2267                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2268                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2269                                 mono_regstate_free_int (rs, val);
2270                         }
2271                 }
2272                 else if (ins->dreg >= MONO_MAX_IREGS) {
2273                         int hreg;
2274                         val = rs->iassign [ins->dreg];
2275                         if (spec [MONO_INST_DEST] == 'l') {
2276                                 /* check special case when dreg have been moved from ecx (clob shift) */
2277                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2278                                         hreg = clob_dreg + 1;
2279                                 else
2280                                         hreg = ins->dreg + 1;
2281
2282                                 /* base prev_dreg on fixed hreg, handle clob case */
2283                                 prev_dreg = hreg - 1;
2284                         } else
2285                                 prev_dreg = ins->dreg;
2286
2287                         if (val < 0) {
2288                                 int spill = 0;
2289                                 if (val < -1) {
2290                                         /* the register gets spilled after this inst */
2291                                         spill = -val -1;
2292                                 }
2293                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2294                                 rs->iassign [ins->dreg] = val;
2295                                 if (spill)
2296                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2297                         }
2298                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2299                         rs->isymbolic [val] = prev_dreg;
2300                         ins->dreg = val;
2301                         /* handle cases where lreg needs to be eax:edx */
2302                         if (spec [MONO_INST_DEST] == 'l') {
2303                                 /* check special case when dreg have been moved from ecx (clob shift) */
2304                                 int hreg = prev_dreg + 1;
2305                                 val = rs->iassign [hreg];
2306                                 if (val < 0) {
2307                                         int spill = 0;
2308                                         if (val < -1) {
2309                                                 /* the register gets spilled after this inst */
2310                                                 spill = -val -1;
2311                                         }
2312                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2313                                         rs->iassign [hreg] = val;
2314                                         if (spill)
2315                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2316                                 }
2317                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2318                                 rs->isymbolic [val] = hreg;
2319                                 if (ins->dreg == AMD64_RAX) {
2320                                         if (val != AMD64_RDX)
2321                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2322                                 } else if (ins->dreg == AMD64_RDX) {
2323                                         if (val == AMD64_RAX) {
2324                                                 /* swap */
2325                                                 g_assert_not_reached ();
2326                                         } else {
2327                                                 /* two forced copies */
2328                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2329                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2330                                         }
2331                                 } else {
2332                                         if (val == AMD64_RDX) {
2333                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2334                                         } else {
2335                                                 /* two forced copies */
2336                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2337                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2338                                         }
2339                                 }
2340                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2341                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2342                                         mono_regstate_free_int (rs, val);
2343                                 }
2344                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != AMD64_RAX && spec [MONO_INST_CLOB] != 'd') {
2345                                 /* this instruction only outputs to EAX, need to copy */
2346                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2347                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != AMD64_RDX && spec [MONO_INST_CLOB] != 'd') {
2348                                 create_copy_ins (cfg, ins->dreg, AMD64_RDX, ins);
2349                         }
2350                 }
2351                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2352                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2353                         mono_regstate_free_int (rs, ins->dreg);
2354                 }
2355                 /* put src1 in EAX if it needs to be */
2356                 if (spec [MONO_INST_SRC1] == 'a') {
2357                         if (!(rs->ifree_mask & (1 << AMD64_RAX))) {
2358                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2359                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2360                                 mono_regstate_free_int (rs, AMD64_RAX);
2361                         }
2362                         if (ins->sreg1 < MONO_MAX_IREGS) {
2363                                 /* The argument is already in a hard reg, need to copy */
2364                                 MonoInst *copy = create_copy_ins (cfg, AMD64_RAX, ins->sreg1, NULL);
2365                                 insert_before_ins (ins, tmp, copy);
2366                         }
2367                         /* force-set sreg1 */
2368                         rs->iassign [ins->sreg1] = AMD64_RAX;
2369                         rs->isymbolic [AMD64_RAX] = ins->sreg1;
2370                         ins->sreg1 = AMD64_RAX;
2371                         rs->ifree_mask &= ~ (1 << AMD64_RAX);
2372                 }
2373
2374                 /* Track sreg1 */
2375                 if (spec [MONO_INST_SRC1] == 'f') {
2376                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2377                                 MonoInst *load;
2378                                 MonoInst *store = NULL;
2379
2380                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2381                                         GList *spill_node;
2382                                         spill_node = g_list_first (fspill_list);
2383                                         g_assert (spill_node);
2384
2385                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2386                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2387                                 }
2388
2389                                 fspill++;
2390                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2391                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2392                                 insert_before_ins (ins, tmp, load);
2393                                 if (store) 
2394                                         insert_before_ins (load, tmp, store);
2395                         }
2396                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2397                         /* force source to be same as dest */
2398                         rs->iassign [ins->sreg1] = ins->dreg;
2399                         rs->iassign [ins->sreg1 + 1] = ins->unused;
2400
2401                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2402                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2403
2404                         ins->sreg1 = ins->dreg;
2405                         /* 
2406                          * No need for saving the reg, we know that src1=dest in this cases
2407                          * ins->inst_c0 = ins->unused;
2408                          */
2409
2410                         /* make sure that we remove them from free mask */
2411                         rs->ifree_mask &= ~ (1 << ins->dreg);
2412                         rs->ifree_mask &= ~ (1 << ins->unused);
2413                 }
2414                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2415                         val = rs->iassign [ins->sreg1];
2416                         prev_sreg1 = ins->sreg1;
2417                         if (val < 0) {
2418                                 int spill = 0;
2419                                 if (val < -1) {
2420                                         /* the register gets spilled after this inst */
2421                                         spill = -val -1;
2422                                 }
2423                                 if (0 && (ins->opcode == OP_MOVE)) {
2424                                         /* 
2425                                          * small optimization: the dest register is already allocated
2426                                          * but the src one is not: we can simply assign the same register
2427                                          * here and peephole will get rid of the instruction later.
2428                                          * This optimization may interfere with the clobbering handling:
2429                                          * it removes a mov operation that will be added again to handle clobbering.
2430                                          * There are also some other issues that should with make testjit.
2431                                          */
2432                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2433                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2434                                         //g_assert (val >= 0);
2435                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2436                                 } else {
2437                                         //g_assert (val == -1); /* source cannot be spilled */
2438                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2439                                         rs->iassign [ins->sreg1] = val;
2440                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2441                                 }
2442                                 if (spill) {
2443                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2444                                         insert_before_ins (ins, tmp, store);
2445                                 }
2446                         }
2447                         rs->isymbolic [val] = prev_sreg1;
2448                         ins->sreg1 = val;
2449                 } else {
2450                         prev_sreg1 = -1;
2451                 }
2452                 /* handle clobbering of sreg1 */
2453                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2454                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2455                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2456                         insert_before_ins (ins, tmp, copy);
2457                         /* we set sreg1 to dest as well */
2458                         prev_sreg1 = ins->sreg1 = ins->dreg;
2459                         src2_mask &= ~ (1 << ins->dreg);
2460                 }
2461                 /* track sreg2 */
2462                 if (spec [MONO_INST_SRC2] == 'f') {
2463                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2464                                 MonoInst *load;
2465                                 MonoInst *store = NULL;
2466
2467                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2468                                         GList *spill_node;
2469
2470                                         spill_node = g_list_first (fspill_list);
2471                                         g_assert (spill_node);
2472                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2473                                                 spill_node = g_list_next (spill_node);
2474         
2475                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2476                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2477                                 } 
2478                                 
2479                                 fspill++;
2480                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2481                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2482                                 insert_before_ins (ins, tmp, load);
2483                                 if (store) 
2484                                         insert_before_ins (load, tmp, store);
2485                         }
2486                 } 
2487                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2488                         val = rs->iassign [ins->sreg2];
2489                         prev_sreg2 = ins->sreg2;
2490                         if (val < 0) {
2491                                 int spill = 0;
2492                                 if (val < -1) {
2493                                         /* the register gets spilled after this inst */
2494                                         spill = -val -1;
2495                                 }
2496                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2497                                 rs->iassign [ins->sreg2] = val;
2498                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2499                                 if (spill)
2500                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2501                         }
2502                         rs->isymbolic [val] = prev_sreg2;
2503                         ins->sreg2 = val;
2504                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != AMD64_RCX) {
2505                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [AMD64_RCX]));
2506                         }
2507                 } else {
2508                         prev_sreg2 = -1;
2509                 }
2510
2511                 if (spec [MONO_INST_CLOB] == 'c') {
2512                         int j, s;
2513                         MonoCallInst *call = (MonoCallInst*)ins;
2514                         GSList *list;
2515                         guint32 clob_mask = AMD64_CALLEE_REGS;
2516
2517                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2518                                 s = 1 << j;
2519                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2520                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [j]);
2521                                         mono_regstate_free_int (rs, j);
2522                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2523                                 }
2524                         }
2525
2526                         /* 
2527                          * Assign all registers in call->out_reg_args to the proper 
2528                          * argument registers.
2529                          */
2530
2531                         list = call->out_reg_args;
2532                         if (list) {
2533                                 while (list) {
2534                                         guint64 regpair;
2535                                         int reg, hreg;
2536
2537                                         regpair = (guint64) (list->data);
2538                                         hreg = regpair >> 32;
2539                                         reg = regpair & 0xffffffff;
2540
2541                                         rs->iassign [reg] = hreg;
2542                                         rs->isymbolic [hreg] = reg;
2543                                         rs->ifree_mask &= ~ (1 << hreg);
2544
2545                                         list = g_slist_next (list);
2546                                 }
2547                                 g_slist_free (call->out_reg_args);
2548                         }
2549                 }
2550
2551                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2552                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2553                         mono_regstate_free_int (rs, ins->sreg1);
2554                 }
2555                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2556                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2557                         mono_regstate_free_int (rs, ins->sreg2);
2558                 }*/
2559         
2560                 DEBUG (print_ins (i, ins));
2561                 /* this may result from a insert_before call */
2562                 if (!tmp->next)
2563                         bb->code = tmp->data;
2564                 tmp = tmp->next;
2565         }
2566
2567         g_free (reginfo);
2568         g_free (reginfof);
2569         g_list_free (fspill_list);
2570 }
2571
2572 static unsigned char*
2573 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2574 {
2575         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2576         x86_fnstcw_membase(code, AMD64_RSP, 0);
2577         amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
2578         amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2579         amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
2580         amd64_fldcw_membase (code, AMD64_RSP, 2);
2581         amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
2582         amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
2583         amd64_pop_reg (code, dreg);
2584         amd64_fldcw_membase (code, AMD64_RSP, 0);
2585         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2586
2587         if (size == 1)
2588                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
2589         else if (size == 2)
2590                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
2591         return code;
2592 }
2593
2594 static unsigned char*
2595 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2596 {
2597         int sreg = tree->sreg1;
2598 #ifdef PLATFORM_WIN32
2599         guint8* br[5];
2600
2601         NOT_IMPLEMENTED;
2602
2603         /*
2604          * Under Windows:
2605          * If requested stack size is larger than one page,
2606          * perform stack-touch operation
2607          */
2608         /*
2609          * Generate stack probe code.
2610          * Under Windows, it is necessary to allocate one page at a time,
2611          * "touching" stack after each successful sub-allocation. This is
2612          * because of the way stack growth is implemented - there is a
2613          * guard page before the lowest stack page that is currently commited.
2614          * Stack normally grows sequentially so OS traps access to the
2615          * guard page and commits more pages when needed.
2616          */
2617         amd64_test_reg_imm (code, sreg, ~0xFFF);
2618         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2619
2620         br[2] = code; /* loop */
2621         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
2622         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
2623         amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2624         amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2625         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2626         amd64_patch (br[3], br[2]);
2627         amd64_test_reg_reg (code, sreg, sreg);
2628         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2629         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2630
2631         br[1] = code; x86_jump8 (code, 0);
2632
2633         amd64_patch (br[0], code);
2634         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2635         amd64_patch (br[1], code);
2636         amd64_patch (br[4], code);
2637 #else /* PLATFORM_WIN32 */
2638         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
2639 #endif
2640         if (tree->flags & MONO_INST_INIT) {
2641                 int offset = 0;
2642                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
2643                         amd64_push_reg (code, AMD64_RAX);
2644                         offset += 8;
2645                 }
2646                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
2647                         amd64_push_reg (code, AMD64_RCX);
2648                         offset += 8;
2649                 }
2650                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
2651                         amd64_push_reg (code, AMD64_RDI);
2652                         offset += 8;
2653                 }
2654                 
2655                 amd64_shift_reg_imm (code, X86_SHR, sreg, 4);
2656                 if (sreg != AMD64_RCX)
2657                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
2658                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2659                                 
2660                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
2661                 amd64_cld (code);
2662                 amd64_prefix (code, X86_REP_PREFIX);
2663                 amd64_stosl (code);
2664                 
2665                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
2666                         amd64_pop_reg (code, AMD64_RDI);
2667                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
2668                         amd64_pop_reg (code, AMD64_RCX);
2669                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
2670                         amd64_pop_reg (code, AMD64_RAX);
2671         }
2672         return code;
2673 }
2674
2675 static guint8*
2676 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2677 {
2678         CallInfo *cinfo;
2679         guint32 offset, quad;
2680
2681         /* Move return value to the target register */
2682         /* FIXME: do this in the local reg allocator */
2683         switch (ins->opcode) {
2684         case CEE_CALL:
2685         case OP_CALL_REG:
2686         case OP_CALL_MEMBASE:
2687         case OP_LCALL:
2688         case OP_LCALL_REG:
2689         case OP_LCALL_MEMBASE:
2690                 if (ins->dreg != AMD64_RAX)
2691                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, 8);
2692                 break;
2693         case OP_FCALL:
2694         case OP_FCALL_REG:
2695         case OP_FCALL_MEMBASE:
2696                 /* FIXME: optimize this */
2697                 offset = mono_spillvar_offset_float (cfg, 0);
2698                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
2699                         amd64_movss_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2700                         amd64_fld_membase (code, AMD64_RBP, offset, FALSE);
2701                 }
2702                 else {
2703                         amd64_movsd_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2704                         amd64_fld_membase (code, AMD64_RBP, offset, TRUE);
2705                 }
2706                 break;
2707         case OP_VCALL:
2708         case OP_VCALL_REG:
2709         case OP_VCALL_MEMBASE:
2710                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2711                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2712                         /* Pop the destination address from the stack */
2713                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2714                         amd64_pop_reg (code, AMD64_RCX);
2715                         
2716                         for (quad = 0; quad < 2; quad ++) {
2717                                 switch (cinfo->ret.pair_storage [quad]) {
2718                                 case ArgInIReg:
2719                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
2720                                         break;
2721                                 case ArgInFloatSSEReg:
2722                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2723                                         break;
2724                                 case ArgInDoubleSSEReg:
2725                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2726                                         break;
2727                                 case ArgNone:
2728                                         break;
2729                                 default:
2730                                         NOT_IMPLEMENTED;
2731                                 }
2732                         }
2733                 }
2734                 break;
2735         }
2736
2737         return code;
2738 }
2739
2740 /*
2741  * emit_load_volatile_arguments:
2742  *
2743  *  Load volatile arguments from the stack to the original input registers.
2744  * Required before a tail call.
2745  */
2746 static guint8*
2747 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2748 {
2749         MonoMethod *method = cfg->method;
2750         MonoMethodSignature *sig;
2751         MonoInst *inst;
2752         CallInfo *cinfo;
2753         guint32 i;
2754
2755         /* FIXME: Generate intermediate code instead */
2756
2757         sig = method->signature;
2758
2759         cinfo = get_call_info (sig, FALSE);
2760         
2761         /* This is the opposite of the code in emit_prolog */
2762
2763         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2764                 ArgInfo *ainfo = cinfo->args + i;
2765                 MonoType *arg_type;
2766                 inst = cfg->varinfo [i];
2767
2768                 if (sig->hasthis && (i == 0))
2769                         arg_type = &mono_defaults.object_class->byval_arg;
2770                 else
2771                         arg_type = sig->params [i - sig->hasthis];
2772
2773                 if (inst->opcode != OP_REGVAR) {
2774                         switch (ainfo->storage) {
2775                         case ArgInIReg: {
2776                                 guint32 size = 8;
2777
2778                                 /* FIXME: I1 etc */
2779                                 amd64_mov_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset, size);
2780                                 break;
2781                         }
2782                         case ArgInFloatSSEReg:
2783                                 amd64_movss_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2784                                 break;
2785                         case ArgInDoubleSSEReg:
2786                                 amd64_movsd_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2787                                 break;
2788                         default:
2789                                 break;
2790                         }
2791                 }
2792         }
2793
2794         g_free (cinfo);
2795
2796         return code;
2797 }
2798
2799 #define REAL_PRINT_REG(text,reg) \
2800 mono_assert (reg >= 0); \
2801 amd64_push_reg (code, AMD64_RAX); \
2802 amd64_push_reg (code, AMD64_RDX); \
2803 amd64_push_reg (code, AMD64_RCX); \
2804 amd64_push_reg (code, reg); \
2805 amd64_push_imm (code, reg); \
2806 amd64_push_imm (code, text " %d %p\n"); \
2807 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2808 amd64_call_reg (code, AMD64_RAX); \
2809 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2810 amd64_pop_reg (code, AMD64_RCX); \
2811 amd64_pop_reg (code, AMD64_RDX); \
2812 amd64_pop_reg (code, AMD64_RAX);
2813
2814 /* benchmark and set based on cpu */
2815 #define LOOP_ALIGNMENT 8
2816 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2817
2818 void
2819 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2820 {
2821         MonoInst *ins;
2822         MonoCallInst *call;
2823         guint offset;
2824         guint8 *code = cfg->native_code + cfg->code_len;
2825         MonoInst *last_ins = NULL;
2826         guint last_offset = 0;
2827         int max_len, cpos;
2828
2829         if (cfg->opt & MONO_OPT_PEEPHOLE)
2830                 peephole_pass (cfg, bb);
2831
2832         if (cfg->opt & MONO_OPT_LOOP) {
2833                 int pad, align = LOOP_ALIGNMENT;
2834                 /* set alignment depending on cpu */
2835                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2836                         pad = align - pad;
2837                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2838                         amd64_padding (code, pad);
2839                         cfg->code_len += pad;
2840                         bb->native_offset = cfg->code_len;
2841                 }
2842         }
2843
2844         if (cfg->verbose_level > 2)
2845                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2846
2847         cpos = bb->max_offset;
2848
2849         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2850                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2851                 g_assert (!mono_compile_aot);
2852                 cpos += 6;
2853
2854                 cov->data [bb->dfn].cil_code = bb->cil_code;
2855                 /* this is not thread save, but good enough */
2856                 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count); 
2857         }
2858
2859         offset = code - cfg->native_code;
2860
2861         ins = bb->code;
2862         while (ins) {
2863                 offset = code - cfg->native_code;
2864
2865                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2866
2867                 if (offset > (cfg->code_size - max_len - 16)) {
2868                         cfg->code_size *= 2;
2869                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2870                         code = cfg->native_code + offset;
2871                         mono_jit_stats.code_reallocs++;
2872                 }
2873
2874                 mono_debug_record_line_number (cfg, ins, offset);
2875
2876                 switch (ins->opcode) {
2877                 case OP_BIGMUL:
2878                         amd64_mul_reg (code, ins->sreg2, TRUE);
2879                         break;
2880                 case OP_BIGMUL_UN:
2881                         amd64_mul_reg (code, ins->sreg2, FALSE);
2882                         break;
2883                 case OP_X86_SETEQ_MEMBASE:
2884                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2885                         break;
2886                 case OP_STOREI1_MEMBASE_IMM:
2887                         g_assert (amd64_is_imm32 (ins->inst_imm));
2888                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2889                         break;
2890                 case OP_STOREI2_MEMBASE_IMM:
2891                         g_assert (amd64_is_imm32 (ins->inst_imm));
2892                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2893                         break;
2894                 case OP_STOREI4_MEMBASE_IMM:
2895                         g_assert (amd64_is_imm32 (ins->inst_imm));
2896                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2897                         break;
2898                 case OP_STOREI1_MEMBASE_REG:
2899                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2900                         break;
2901                 case OP_STOREI2_MEMBASE_REG:
2902                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2903                         break;
2904                 case OP_STORE_MEMBASE_REG:
2905                 case OP_STOREI8_MEMBASE_REG:
2906                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2907                         break;
2908                 case OP_STOREI4_MEMBASE_REG:
2909                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2910                         break;
2911                 case OP_STORE_MEMBASE_IMM:
2912                 case OP_STOREI8_MEMBASE_IMM:
2913                         if (amd64_is_imm32 (ins->inst_imm))
2914                                 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2915                         else {
2916                                 amd64_mov_reg_imm (code, GP_SCRATCH_REG, ins->inst_imm);
2917                                 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, GP_SCRATCH_REG, 8);
2918                         }
2919                         break;
2920                 case CEE_LDIND_I:
2921                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, sizeof (gpointer));
2922                         break;
2923                 case CEE_LDIND_I4:
2924                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2925                         break;
2926                 case CEE_LDIND_U4:
2927                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2928                         break;
2929                 case OP_LOADU4_MEM:
2930                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2931                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2932                         break;
2933                 case OP_LOAD_MEMBASE:
2934                 case OP_LOADI8_MEMBASE:
2935                         if (amd64_is_imm32 (ins->inst_offset)) {
2936                                 amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2937                         }
2938                         else {
2939                                 amd64_mov_reg_imm_size (code, GP_SCRATCH_REG, ins->inst_offset, 8);
2940                                 amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, GP_SCRATCH_REG, 0, 8);
2941                         }
2942                         break;
2943                 case OP_LOADI4_MEMBASE:
2944                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2945                         break;
2946                 case OP_LOADU4_MEMBASE:
2947                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2948                         break;
2949                 case OP_LOADU1_MEMBASE:
2950                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2951                         break;
2952                 case OP_LOADI1_MEMBASE:
2953                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2954                         break;
2955                 case OP_LOADU2_MEMBASE:
2956                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2957                         break;
2958                 case OP_LOADI2_MEMBASE:
2959                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2960                         break;
2961                 case CEE_CONV_I1:
2962                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2963                         break;
2964                 case CEE_CONV_I2:
2965                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2966                         break;
2967                 case CEE_CONV_U1:
2968                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2969                         break;
2970                 case CEE_CONV_U2:
2971                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2972                         break;
2973                 case CEE_CONV_U8:
2974                 case CEE_CONV_U:
2975                         /* Clean out the upper word */
2976                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
2977                         break;
2978                 case CEE_CONV_I8:
2979                 case CEE_CONV_I:
2980                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
2981                         break;                  
2982                 case OP_COMPARE:
2983                 case OP_LCOMPARE:
2984                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2985                         break;
2986                 case OP_COMPARE_IMM:
2987                         if (!amd64_is_imm32 (ins->inst_imm)) {
2988                                 amd64_mov_reg_imm (code, AMD64_R11, ins->inst_imm);
2989                                 amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, AMD64_R11);
2990                         } else {
2991                                 amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2992                         }
2993                         break;
2994                 case OP_X86_COMPARE_MEMBASE_REG:
2995                         amd64_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2996                         break;
2997                 case OP_X86_COMPARE_MEMBASE_IMM:
2998                         g_assert (amd64_is_imm32 (ins->inst_imm));
2999                         amd64_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
3000                         break;
3001                 case OP_X86_COMPARE_REG_MEMBASE:
3002                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
3003                         break;
3004                 case OP_X86_TEST_NULL:
3005                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3006                         break;
3007                 case OP_X86_ADD_MEMBASE_IMM:
3008                         /* FIXME: Make a 64 version too */
3009                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3010                         break;
3011                 case OP_X86_ADD_MEMBASE:
3012                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3013                         break;
3014                 case OP_X86_SUB_MEMBASE_IMM:
3015                         g_assert (amd64_is_imm32 (ins->inst_imm));
3016                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3017                         break;
3018                 case OP_X86_SUB_MEMBASE:
3019                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3020                         break;
3021                 case OP_X86_INC_MEMBASE:
3022                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3023                         break;
3024                 case OP_X86_INC_REG:
3025                         amd64_inc_reg_size (code, ins->dreg, 4);
3026                         break;
3027                 case OP_X86_DEC_MEMBASE:
3028                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3029                         break;
3030                 case OP_X86_DEC_REG:
3031                         amd64_dec_reg_size (code, ins->dreg, 4);
3032                         break;
3033                 case OP_X86_MUL_MEMBASE:
3034                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3035                         break;
3036                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
3037                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3038                         break;
3039                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3040                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3041                         break;
3042                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
3043                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3044                         break;
3045                 case CEE_BREAK:
3046                         amd64_breakpoint (code);
3047                         break;
3048
3049                 case OP_ADDCC:
3050                 case CEE_ADD:
3051                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
3052                         break;
3053                 case OP_ADC:
3054                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
3055                         break;
3056                 case OP_ADD_IMM:
3057                         g_assert (amd64_is_imm32 (ins->inst_imm));
3058                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
3059                         break;
3060                 case OP_ADC_IMM:
3061                         g_assert (amd64_is_imm32 (ins->inst_imm));
3062                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
3063                         break;
3064                 case OP_SUBCC:
3065                 case CEE_SUB:
3066                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
3067                         break;
3068                 case OP_SBB:
3069                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
3070                         break;
3071                 case OP_SUB_IMM:
3072                         g_assert (amd64_is_imm32 (ins->inst_imm));
3073                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
3074                         break;
3075                 case OP_SBB_IMM:
3076                         g_assert (amd64_is_imm32 (ins->inst_imm));
3077                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
3078                         break;
3079                 case CEE_AND:
3080                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
3081                         break;
3082                 case OP_AND_IMM:
3083                         g_assert (amd64_is_imm32 (ins->inst_imm));
3084                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3085                         break;
3086                 case CEE_MUL:
3087                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3088                         break;
3089                 case OP_MUL_IMM:
3090                         amd64_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3091                         break;
3092                 case CEE_DIV:
3093                         amd64_cdq (code);
3094                         amd64_div_reg (code, ins->sreg2, TRUE);
3095                         break;
3096                 case CEE_DIV_UN:
3097                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3098                         amd64_div_reg (code, ins->sreg2, FALSE);
3099                         break;
3100                 case OP_DIV_IMM:
3101                         g_assert (amd64_is_imm32 (ins->inst_imm));
3102                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3103                         amd64_cdq (code);
3104                         amd64_div_reg (code, ins->sreg2, TRUE);
3105                         break;
3106                 case CEE_REM:
3107                         amd64_cdq (code);
3108                         amd64_div_reg (code, ins->sreg2, TRUE);
3109                         break;
3110                 case CEE_REM_UN:
3111                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3112                         amd64_div_reg (code, ins->sreg2, FALSE);
3113                         break;
3114                 case OP_REM_IMM:
3115                         g_assert (amd64_is_imm32 (ins->inst_imm));
3116                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3117                         amd64_cdq (code);
3118                         amd64_div_reg (code, ins->sreg2, TRUE);
3119                         break;
3120                 case CEE_OR:
3121                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3122                         break;
3123                 case OP_OR_IMM
3124 :                       g_assert (amd64_is_imm32 (ins->inst_imm));
3125                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3126                         break;
3127                 case CEE_XOR:
3128                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3129                         break;
3130                 case OP_XOR_IMM:
3131                         g_assert (amd64_is_imm32 (ins->inst_imm));
3132                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3133                         break;
3134                 case CEE_SHL:
3135                 case OP_LSHL:
3136                         g_assert (ins->sreg2 == AMD64_RCX);
3137                         amd64_shift_reg (code, X86_SHL, ins->dreg);
3138                         break;
3139                 case CEE_SHR:
3140                 case OP_LSHR:
3141                         g_assert (ins->sreg2 == AMD64_RCX);
3142                         amd64_shift_reg (code, X86_SAR, ins->dreg);
3143                         break;
3144                 case OP_SHR_IMM:
3145                         g_assert (amd64_is_imm32 (ins->inst_imm));
3146                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3147                         break;
3148                 case OP_LSHR_IMM:
3149                         g_assert (amd64_is_imm32 (ins->inst_imm));
3150                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3151                         break;
3152                 case OP_SHR_UN_IMM:
3153                         g_assert (amd64_is_imm32 (ins->inst_imm));
3154                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3155                         break;
3156                 case OP_LSHR_UN_IMM:
3157                         g_assert (amd64_is_imm32 (ins->inst_imm));
3158                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3159                         break;
3160                 case CEE_SHR_UN:
3161                         g_assert (ins->sreg2 == AMD64_RCX);
3162                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3163                         break;
3164                 case OP_LSHR_UN:
3165                         g_assert (ins->sreg2 == AMD64_RCX);
3166                         amd64_shift_reg (code, X86_SHR, ins->dreg);
3167                         break;
3168                 case OP_SHL_IMM:
3169                         g_assert (amd64_is_imm32 (ins->inst_imm));
3170                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3171                         break;
3172                 case OP_LSHL_IMM:
3173                         g_assert (amd64_is_imm32 (ins->inst_imm));
3174                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3175                         break;
3176
3177                 case OP_IADDCC:
3178                 case OP_IADD:
3179                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
3180                         break;
3181                 case OP_IADC:
3182                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
3183                         break;
3184                 case OP_IADD_IMM:
3185                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
3186                         break;
3187                 case OP_IADC_IMM:
3188                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
3189                         break;
3190                 case OP_ISUBCC:
3191                 case OP_ISUB:
3192                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
3193                         break;
3194                 case OP_ISBB:
3195                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
3196                         break;
3197                 case OP_ISUB_IMM:
3198                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
3199                         break;
3200                 case OP_ISBB_IMM:
3201                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
3202                         break;
3203                 case OP_IAND:
3204                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
3205                         break;
3206                 case OP_IAND_IMM:
3207                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
3208                         break;
3209                 case OP_IOR:
3210                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
3211                         break;
3212                 case OP_IOR_IMM:
3213                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
3214                         break;
3215                 case OP_IXOR:
3216                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
3217                         break;
3218                 case OP_IXOR_IMM:
3219                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
3220                         break;
3221                 case OP_INEG:
3222                         amd64_neg_reg_size (code, ins->sreg1, 4);
3223                         break;
3224                 case OP_INOT:
3225                         amd64_not_reg_size (code, ins->sreg1, 4);
3226                         break;
3227                 case OP_ISHL:
3228                         g_assert (ins->sreg2 == AMD64_RCX);
3229                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
3230                         break;
3231                 case OP_ISHR:
3232                         g_assert (ins->sreg2 == AMD64_RCX);
3233                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
3234                         break;
3235                 case OP_ISHR_IMM:
3236                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3237                         break;
3238                 case OP_ISHR_UN_IMM:
3239                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3240                         break;
3241                 case OP_ISHR_UN:
3242                         g_assert (ins->sreg2 == AMD64_RCX);
3243                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3244                         break;
3245                 case OP_ISHL_IMM:
3246                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3247                         break;
3248                 case OP_IMUL:
3249                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3250                         break;
3251                 case OP_IMUL_IMM:
3252                         amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
3253                         break;
3254                 case OP_IMUL_OVF:
3255                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3256                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3257                         break;
3258                 case OP_IMUL_OVF_UN: {
3259                         /* the mul operation and the exception check should most likely be split */
3260                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3261                         /*g_assert (ins->sreg2 == X86_EAX);
3262                         g_assert (ins->dreg == X86_EAX);*/
3263                         if (ins->sreg2 == X86_EAX) {
3264                                 non_eax_reg = ins->sreg1;
3265                         } else if (ins->sreg1 == X86_EAX) {
3266                                 non_eax_reg = ins->sreg2;
3267                         } else {
3268                                 /* no need to save since we're going to store to it anyway */
3269                                 if (ins->dreg != X86_EAX) {
3270                                         saved_eax = TRUE;
3271                                         amd64_push_reg (code, X86_EAX);
3272                                 }
3273                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3274                                 non_eax_reg = ins->sreg2;
3275                         }
3276                         if (ins->dreg == X86_EDX) {
3277                                 if (!saved_eax) {
3278                                         saved_eax = TRUE;
3279                                         amd64_push_reg (code, X86_EAX);
3280                                 }
3281                         } else if (ins->dreg != X86_EAX) {
3282                                 saved_edx = TRUE;
3283                                 amd64_push_reg (code, X86_EDX);
3284                         }
3285                         amd64_mul_reg_size (code, non_eax_reg, FALSE, 4);
3286                         /* save before the check since pop and mov don't change the flags */
3287                         if (ins->dreg != X86_EAX)
3288                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3289                         if (saved_edx)
3290                                 amd64_pop_reg (code, X86_EDX);
3291                         if (saved_eax)
3292                                 amd64_pop_reg (code, X86_EAX);
3293                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3294                         break;
3295                 }
3296                 case OP_IDIV:
3297                         amd64_cdq_size (code, 4);
3298                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3299                         break;
3300                 case OP_IDIV_UN:
3301                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3302                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3303                         break;
3304                 case OP_IDIV_IMM:
3305                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3306                         amd64_cdq_size (code, 4);
3307                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3308                         break;
3309                 case OP_IREM:
3310                         amd64_cdq_size (code, 4);
3311                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3312                         break;
3313                 case OP_IREM_UN:
3314                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3315                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3316                         break;
3317                 case OP_IREM_IMM:
3318                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3319                         amd64_cdq_size (code, 4);
3320                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3321                         break;
3322
3323                 case OP_ICOMPARE:
3324                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3325                         break;
3326                 case OP_ICOMPARE_IMM:
3327                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
3328                         break;
3329
3330                 case OP_IBEQ:
3331                 case OP_IBLT:
3332                 case OP_IBGT:
3333                 case OP_IBGE:
3334                 case OP_IBLE:
3335                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
3336                         break;
3337                 case OP_IBNE_UN:
3338                 case OP_IBLT_UN:
3339                 case OP_IBGT_UN:
3340                 case OP_IBGE_UN:
3341                 case OP_IBLE_UN:
3342                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
3343                         break;
3344                 case OP_COND_EXC_IOV:
3345                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3346                                                                                 TRUE, ins->inst_p1);
3347                         break;
3348                 case OP_COND_EXC_IC:
3349                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3350                                                                                 FALSE, ins->inst_p1);
3351                         break;
3352                 case CEE_NOT:
3353                         amd64_not_reg (code, ins->sreg1);
3354                         break;
3355                 case CEE_NEG:
3356                         amd64_neg_reg (code, ins->sreg1);
3357                         break;
3358                 case OP_SEXT_I1:
3359                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3360                         break;
3361                 case OP_SEXT_I2:
3362                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3363                         break;
3364                 case OP_ICONST:
3365                 case OP_I8CONST:
3366                         if ((((guint64)ins->inst_c0) >> 32) == 0)
3367                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
3368                         else
3369                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
3370                         break;
3371                 case OP_AOTCONST:
3372                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3373                         amd64_set_reg_template (code, ins->dreg);
3374                         break;
3375                 case CEE_CONV_I4:
3376                 case CEE_CONV_U4:
3377                 case OP_MOVE:
3378                 case OP_SETREG:
3379                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
3380                         break;
3381                 case OP_AMD64_SET_XMMREG_R4: {
3382                         /* FIXME: optimize this */
3383                         amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE);
3384                         /* ins->dreg is set to -1 by the reg allocator */
3385                         amd64_movss_reg_membase (code, ins->unused, AMD64_RSP, -8);
3386                         break;
3387                 }
3388                 case OP_AMD64_SET_XMMREG_R8: {
3389                         /* FIXME: optimize this */
3390                         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE);
3391                         /* ins->dreg is set to -1 by the reg allocator */
3392                         amd64_movsd_reg_membase (code, ins->unused, AMD64_RSP, -8);
3393                         break;
3394                 }
3395                 case CEE_JMP: {
3396                         /*
3397                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3398                          * Keep in sync with the code in emit_epilog.
3399                          */
3400                         int pos = 0, i;
3401
3402                         /* FIXME: no tracing support... */
3403                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3404                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3405
3406                         g_assert (!cfg->method->save_lmf);
3407
3408                         code = emit_load_volatile_arguments (cfg, code);
3409
3410                         for (i = 0; i < AMD64_NREG; ++i)
3411                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
3412                                         pos -= sizeof (gpointer);
3413                         
3414                         if (pos)
3415                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
3416
3417                         /* Pop registers in reverse order */
3418                         for (i = AMD64_NREG - 1; i > 0; --i)
3419                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3420                                         amd64_pop_reg (code, i);
3421                                 }
3422
3423                         amd64_leave (code);
3424                         offset = code - cfg->native_code;
3425                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3426                         amd64_set_reg_template (code, AMD64_R11);
3427                         amd64_jump_reg (code, AMD64_R11);
3428                         break;
3429                 }
3430                 case OP_CHECK_THIS:
3431                         /* ensure ins->sreg1 is not NULL */
3432                         amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
3433                         break;
3434                 case OP_ARGLIST: {
3435                         int hreg = ins->sreg1 == AMD64_RAX? AMD64_RCX: AMD64_RAX;
3436                         amd64_push_reg (code, hreg);
3437                         amd64_lea_membase (code, hreg, AMD64_RBP, cfg->sig_cookie);
3438                         amd64_mov_membase_reg (code, ins->sreg1, 0, hreg, 8);
3439                         amd64_pop_reg (code, hreg);
3440                         break;
3441                 }
3442                 case OP_FCALL:
3443                 case OP_LCALL:
3444                 case OP_VCALL:
3445                 case OP_VOIDCALL:
3446                 case CEE_CALL:
3447                         call = (MonoCallInst*)ins;
3448                         /*
3449                          * The AMD64 ABI forces callers to know about varargs.
3450                          */
3451                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
3452                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3453
3454                         if (ins->flags & MONO_INST_HAS_METHOD)
3455                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3456                         else
3457                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3458                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3459                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3460                         code = emit_move_return_value (cfg, ins, code);
3461                         break;
3462                 case OP_FCALL_REG:
3463                 case OP_LCALL_REG:
3464                 case OP_VCALL_REG:
3465                 case OP_VOIDCALL_REG:
3466                 case OP_CALL_REG:
3467                         call = (MonoCallInst*)ins;
3468
3469                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3470                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3471                                 ins->sreg1 = AMD64_R11;
3472                         }
3473
3474                         /*
3475                          * The AMD64 ABI forces callers to know about varargs.
3476                          */
3477                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
3478                                 if (ins->sreg1 == AMD64_RAX) {
3479                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
3480                                         ins->sreg1 = AMD64_R11;
3481                                 }
3482                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3483                         }
3484                         amd64_call_reg (code, ins->sreg1);
3485                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3486                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3487                         code = emit_move_return_value (cfg, ins, code);
3488                         break;
3489                 case OP_FCALL_MEMBASE:
3490                 case OP_LCALL_MEMBASE:
3491                 case OP_VCALL_MEMBASE:
3492                 case OP_VOIDCALL_MEMBASE:
3493                 case OP_CALL_MEMBASE:
3494                         call = (MonoCallInst*)ins;
3495
3496                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3497                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3498                                 ins->sreg1 = AMD64_R11;
3499                         }
3500
3501                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
3502                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3503                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3504                         code = emit_move_return_value (cfg, ins, code);
3505                         break;
3506                 case OP_OUTARG:
3507                 case OP_X86_PUSH:
3508                         amd64_push_reg (code, ins->sreg1);
3509                         break;
3510                 case OP_X86_PUSH_IMM:
3511                         g_assert (amd64_is_imm32 (ins->inst_imm));
3512                         amd64_push_imm (code, ins->inst_imm);
3513                         break;
3514                 case OP_X86_PUSH_MEMBASE:
3515                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
3516                         break;
3517                 case OP_X86_PUSH_OBJ: 
3518                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
3519                         amd64_push_reg (code, AMD64_RDI);
3520                         amd64_push_reg (code, AMD64_RSI);
3521                         amd64_push_reg (code, AMD64_RCX);
3522                         if (ins->inst_offset)
3523                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
3524                         else
3525                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
3526                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
3527                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
3528                         amd64_cld (code);
3529                         amd64_prefix (code, X86_REP_PREFIX);
3530                         amd64_movsd (code);
3531                         amd64_pop_reg (code, AMD64_RCX);
3532                         amd64_pop_reg (code, AMD64_RSI);
3533                         amd64_pop_reg (code, AMD64_RDI);
3534                         break;
3535                 case OP_X86_LEA:
3536                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3537                         break;
3538                 case OP_X86_LEA_MEMBASE:
3539                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3540                         break;
3541                 case OP_X86_XCHG:
3542                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3543                         break;
3544                 case OP_LOCALLOC:
3545                         /* keep alignment */
3546                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3547                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3548                         code = mono_emit_stack_alloc (code, ins);
3549                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3550                         break;
3551                 case CEE_RET:
3552                         amd64_ret (code);
3553                         break;
3554                 case CEE_THROW: {
3555                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
3556                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3557                                              (gpointer)"mono_arch_throw_exception");
3558                         break;
3559                 }
3560                 case OP_CALL_HANDLER: 
3561                         /* Align stack */
3562                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
3563                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3564                         amd64_call_imm (code, 0);
3565                         /* Restore stack alignment */
3566                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3567                         break;
3568                 case OP_LABEL:
3569                         ins->inst_c0 = code - cfg->native_code;
3570                         break;
3571                 case CEE_BR:
3572                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3573                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3574                         //break;
3575                         if (ins->flags & MONO_INST_BRLABEL) {
3576                                 if (ins->inst_i0->inst_c0) {
3577                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3578                                 } else {
3579                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3580                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3581                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3582                                                 x86_jump8 (code, 0);
3583                                         else 
3584                                                 x86_jump32 (code, 0);
3585                                 }
3586                         } else {
3587                                 if (ins->inst_target_bb->native_offset) {
3588                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3589                                 } else {
3590                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3591                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3592                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3593                                                 x86_jump8 (code, 0);
3594                                         else 
3595                                                 x86_jump32 (code, 0);
3596                                 } 
3597                         }
3598                         break;
3599                 case OP_BR_REG:
3600                         amd64_jump_reg (code, ins->sreg1);
3601                         break;
3602                 case OP_CEQ:
3603                 case OP_ICEQ:
3604                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3605                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3606                         break;
3607                 case OP_CLT:
3608                 case OP_ICLT:
3609                         amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3610                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3611                         break;
3612                 case OP_CLT_UN:
3613                 case OP_ICLT_UN:
3614                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3615                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3616                         break;
3617                 case OP_CGT:
3618                 case OP_ICGT:
3619                         amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3620                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3621                         break;
3622                 case OP_CGT_UN:
3623                 case OP_ICGT_UN:
3624                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3625                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3626                         break;
3627                 case OP_COND_EXC_EQ:
3628                 case OP_COND_EXC_NE_UN:
3629                 case OP_COND_EXC_LT:
3630                 case OP_COND_EXC_LT_UN:
3631                 case OP_COND_EXC_GT:
3632                 case OP_COND_EXC_GT_UN:
3633                 case OP_COND_EXC_GE:
3634                 case OP_COND_EXC_GE_UN:
3635                 case OP_COND_EXC_LE:
3636                 case OP_COND_EXC_LE_UN:
3637                 case OP_COND_EXC_OV:
3638                 case OP_COND_EXC_NO:
3639                 case OP_COND_EXC_C:
3640                 case OP_COND_EXC_NC:
3641                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3642                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3643                         break;
3644                 case CEE_BEQ:
3645                 case CEE_BNE_UN:
3646                 case CEE_BLT:
3647                 case CEE_BLT_UN:
3648                 case CEE_BGT:
3649                 case CEE_BGT_UN:
3650                 case CEE_BGE:
3651                 case CEE_BGE_UN:
3652                 case CEE_BLE:
3653                 case CEE_BLE_UN:
3654                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3655                         break;
3656
3657                 /* floating point opcodes */
3658                 case OP_R8CONST: {
3659                         double d = *(double *)ins->inst_p0;
3660
3661                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3662                                 amd64_fldz (code);
3663                         } else if (d == 1.0) {
3664                                 x86_fld1 (code);
3665                         } else {
3666                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
3667                                 amd64_fld_membase (code, AMD64_RIP, 0, TRUE);
3668                         }
3669                         break;
3670                 }
3671                 case OP_R4CONST: {
3672                         float f = *(float *)ins->inst_p0;
3673
3674                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3675                                 amd64_fldz (code);
3676                         } else if (f == 1.0) {
3677                                 x86_fld1 (code);
3678                         } else {
3679                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
3680                                 amd64_fld_membase (code, AMD64_RIP, 0, FALSE);
3681                         }
3682                         break;
3683                 }
3684                 case OP_STORER8_MEMBASE_REG:
3685                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3686                         break;
3687                 case OP_LOADR8_SPILL_MEMBASE:
3688                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3689                         amd64_fxch (code, 1);
3690                         break;
3691                 case OP_LOADR8_MEMBASE:
3692                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3693                         break;
3694                 case OP_STORER4_MEMBASE_REG:
3695                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3696                         break;
3697                 case OP_LOADR4_MEMBASE:
3698                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3699                         break;
3700                 case CEE_CONV_R4: /* FIXME: change precision */
3701                 case CEE_CONV_R8:
3702                         amd64_push_reg (code, ins->sreg1);
3703                         amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
3704                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3705                         break;
3706                 case CEE_CONV_R_UN:
3707                         /* Emulated */
3708                         g_assert_not_reached ();
3709                         break;
3710                 case OP_LCONV_TO_R4: /* FIXME: change precision */
3711                 case OP_LCONV_TO_R8:
3712                         amd64_push_reg (code, ins->sreg1);
3713                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3714                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3715                         break;
3716                 case OP_X86_FP_LOAD_I8:
3717                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3718                         break;
3719                 case OP_X86_FP_LOAD_I4:
3720                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3721                         break;
3722                 case OP_FCONV_TO_I1:
3723                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3724                         break;
3725                 case OP_FCONV_TO_U1:
3726                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3727                         break;
3728                 case OP_FCONV_TO_I2:
3729                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3730                         break;
3731                 case OP_FCONV_TO_U2:
3732                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3733                         break;
3734                 case OP_FCONV_TO_I4:
3735                 case OP_FCONV_TO_I:
3736                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3737                         break;
3738                 case OP_FCONV_TO_I8:
3739                         code = emit_float_to_int (cfg, code, ins->dreg, 8, TRUE);
3740                         break;
3741                 case OP_LCONV_TO_R_UN: { 
3742                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3743                         guint8 *br;
3744
3745                         /* load 64bit integer to FP stack */
3746                         amd64_push_imm (code, 0);
3747                         amd64_push_reg (code, ins->sreg2);
3748                         amd64_push_reg (code, ins->sreg1);
3749                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3750                         /* store as 80bit FP value */
3751                         x86_fst80_membase (code, AMD64_RSP, 0);
3752                         
3753                         /* test if lreg is negative */
3754                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3755                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3756         
3757                         /* add correction constant mn */
3758                         x86_fld80_mem (code, mn);
3759                         x86_fld80_membase (code, AMD64_RSP, 0);
3760                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3761                         x86_fst80_membase (code, AMD64_RSP, 0);
3762
3763                         amd64_patch (br, code);
3764
3765                         x86_fld80_membase (code, AMD64_RSP, 0);
3766                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3767
3768                         break;
3769                 }
3770                 case OP_LCONV_TO_OVF_I: {
3771                         guint8 *br [3], *label [1];
3772
3773                         /* 
3774                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3775                          */
3776                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3777
3778                         /* If the low word top bit is set, see if we are negative */
3779                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3780                         /* We are not negative (no top bit set, check for our top word to be zero */
3781                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3782                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3783                         label [0] = code;
3784
3785                         /* throw exception */
3786                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3787                         x86_jump32 (code, 0);
3788         
3789                         amd64_patch (br [0], code);
3790                         /* our top bit is set, check that top word is 0xfffffff */
3791                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3792                 
3793                         amd64_patch (br [1], code);
3794                         /* nope, emit exception */
3795                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3796                         amd64_patch (br [2], label [0]);
3797
3798                         if (ins->dreg != ins->sreg1)
3799                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3800                         break;
3801                 }
3802                 case CEE_CONV_OVF_U4:
3803                         /* FIXME: */
3804                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3805                         break;
3806                 case OP_FADD:
3807                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3808                         break;
3809                 case OP_FSUB:
3810                         amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3811                         break;          
3812                 case OP_FMUL:
3813                         amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3814                         break;          
3815                 case OP_FDIV:
3816                         amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3817                         break;          
3818                 case OP_FNEG:
3819                         amd64_fchs (code);
3820                         break;          
3821                 case OP_SIN:
3822                         amd64_fsin (code);
3823                         amd64_fldz (code);
3824                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3825                         break;          
3826                 case OP_COS:
3827                         amd64_fcos (code);
3828                         amd64_fldz (code);
3829                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3830                         break;          
3831                 case OP_ABS:
3832                         amd64_fabs (code);
3833                         break;          
3834                 case OP_TAN: {
3835                         /* 
3836                          * it really doesn't make sense to inline all this code,
3837                          * it's here just to show that things may not be as simple 
3838                          * as they appear.
3839                          */
3840                         guchar *check_pos, *end_tan, *pop_jump;
3841                         amd64_push_reg (code, AMD64_RAX);
3842                         amd64_fptan (code);
3843                         amd64_fnstsw (code);
3844                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3845                         check_pos = code;
3846                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3847                         amd64_fstp (code, 0); /* pop the 1.0 */
3848                         end_tan = code;
3849                         x86_jump8 (code, 0);
3850                         amd64_fldpi (code);
3851                         amd64_fp_op (code, X86_FADD, 0);
3852                         amd64_fxch (code, 1);
3853                         x86_fprem1 (code);
3854                         amd64_fstsw (code);
3855                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3856                         pop_jump = code;
3857                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3858                         amd64_fstp (code, 1);
3859                         amd64_fptan (code);
3860                         amd64_patch (pop_jump, code);
3861                         amd64_fstp (code, 0); /* pop the 1.0 */
3862                         amd64_patch (check_pos, code);
3863                         amd64_patch (end_tan, code);
3864                         amd64_fldz (code);
3865                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3866                         amd64_pop_reg (code, AMD64_RAX);
3867                         break;
3868                 }
3869                 case OP_ATAN:
3870                         x86_fld1 (code);
3871                         amd64_fpatan (code);
3872                         amd64_fldz (code);
3873                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3874                         break;          
3875                 case OP_SQRT:
3876                         amd64_fsqrt (code);
3877                         break;          
3878                 case OP_X86_FPOP:
3879                         amd64_fstp (code, 0);
3880                         break;          
3881                 case OP_FREM: {
3882                         guint8 *l1, *l2;
3883
3884                         amd64_push_reg (code, AMD64_RAX);
3885                         /* we need to exchange ST(0) with ST(1) */
3886                         amd64_fxch (code, 1);
3887
3888                         /* this requires a loop, because fprem somtimes 
3889                          * returns a partial remainder */
3890                         l1 = code;
3891                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3892                         /* x86_fprem1 (code); */
3893                         amd64_fprem (code);
3894                         amd64_fnstsw (code);
3895                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3896                         l2 = code + 2;
3897                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3898
3899                         /* pop result */
3900                         amd64_fstp (code, 1);
3901
3902                         amd64_pop_reg (code, AMD64_RAX);
3903                         break;
3904                 }
3905                 case OP_FCOMPARE:
3906                         if (cfg->opt & MONO_OPT_FCMOV) {
3907                                 amd64_fcomip (code, 1);
3908                                 amd64_fstp (code, 0);
3909                                 break;
3910                         }
3911                         /* this overwrites EAX */
3912                         EMIT_FPCOMPARE(code);
3913                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3914                         break;
3915                 case OP_FCEQ:
3916                         if (cfg->opt & MONO_OPT_FCMOV) {
3917                                 /* zeroing the register at the start results in 
3918                                  * shorter and faster code (we can also remove the widening op)
3919                                  */
3920                                 guchar *unordered_check;
3921                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3922                                 amd64_fcomip (code, 1);
3923                                 amd64_fstp (code, 0);
3924                                 unordered_check = code;
3925                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3926                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3927                                 amd64_patch (unordered_check, code);
3928                                 break;
3929                         }
3930                         if (ins->dreg != AMD64_RAX) 
3931                                 amd64_push_reg (code, AMD64_RAX);
3932
3933                         EMIT_FPCOMPARE(code);
3934                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3935                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3936                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3937                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3938
3939                         if (ins->dreg != AMD64_RAX) 
3940                                 amd64_pop_reg (code, AMD64_RAX);
3941                         break;
3942                 case OP_FCLT:
3943                 case OP_FCLT_UN:
3944                         if (cfg->opt & MONO_OPT_FCMOV) {
3945                                 /* zeroing the register at the start results in 
3946                                  * shorter and faster code (we can also remove the widening op)
3947                                  */
3948                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3949                                 amd64_fcomip (code, 1);
3950                                 amd64_fstp (code, 0);
3951                                 if (ins->opcode == OP_FCLT_UN) {
3952                                         guchar *unordered_check = code;
3953                                         guchar *jump_to_end;
3954                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3955                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3956                                         jump_to_end = code;
3957                                         x86_jump8 (code, 0);
3958                                         amd64_patch (unordered_check, code);
3959                                         amd64_inc_reg (code, ins->dreg);
3960                                         amd64_patch (jump_to_end, code);
3961                                 } else {
3962                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3963                                 }
3964                                 break;
3965                         }
3966                         if (ins->dreg != AMD64_RAX) 
3967                                 amd64_push_reg (code, AMD64_RAX);
3968
3969                         EMIT_FPCOMPARE(code);
3970                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3971                         if (ins->opcode == OP_FCLT_UN) {
3972                                 guchar *is_not_zero_check, *end_jump;
3973                                 is_not_zero_check = code;
3974                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3975                                 end_jump = code;
3976                                 x86_jump8 (code, 0);
3977                                 amd64_patch (is_not_zero_check, code);
3978                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
3979
3980                                 amd64_patch (end_jump, code);
3981                         }
3982                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3983                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3984
3985                         if (ins->dreg != AMD64_RAX) 
3986                                 amd64_pop_reg (code, AMD64_RAX);
3987                         break;
3988                 case OP_FCGT:
3989                 case OP_FCGT_UN:
3990                         if (cfg->opt & MONO_OPT_FCMOV) {
3991                                 /* zeroing the register at the start results in 
3992                                  * shorter and faster code (we can also remove the widening op)
3993                                  */
3994                                 guchar *unordered_check;
3995                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3996                                 amd64_fcomip (code, 1);
3997                                 amd64_fstp (code, 0);
3998                                 if (ins->opcode == OP_FCGT) {
3999                                         unordered_check = code;
4000                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4001                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4002                                         amd64_patch (unordered_check, code);
4003                                 } else {
4004                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4005                                 }
4006                                 break;
4007                         }
4008                         if (ins->dreg != AMD64_RAX) 
4009                                 amd64_push_reg (code, AMD64_RAX);
4010
4011                         EMIT_FPCOMPARE(code);
4012                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
4013                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4014                         if (ins->opcode == OP_FCGT_UN) {
4015                                 guchar *is_not_zero_check, *end_jump;
4016                                 is_not_zero_check = code;
4017                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4018                                 end_jump = code;
4019                                 x86_jump8 (code, 0);
4020                                 amd64_patch (is_not_zero_check, code);
4021                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4022
4023                                 amd64_patch (end_jump, code);
4024                         }
4025                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
4026                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4027
4028                         if (ins->dreg != AMD64_RAX) 
4029                                 amd64_pop_reg (code, AMD64_RAX);
4030                         break;
4031                 case OP_FBEQ:
4032                         if (cfg->opt & MONO_OPT_FCMOV) {
4033                                 guchar *jump = code;
4034                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
4035                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4036                                 amd64_patch (jump, code);
4037                                 break;
4038                         }
4039                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
4040                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
4041                         break;
4042                 case OP_FBNE_UN:
4043                         /* Branch if C013 != 100 */
4044                         if (cfg->opt & MONO_OPT_FCMOV) {
4045                                 /* branch if !ZF or (PF|CF) */
4046                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4047                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4048                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
4049                                 break;
4050                         }
4051                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4052                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4053                         break;
4054                 case OP_FBLT:
4055                         if (cfg->opt & MONO_OPT_FCMOV) {
4056                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4057                                 break;
4058                         }
4059                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4060                         break;
4061                 case OP_FBLT_UN:
4062                         if (cfg->opt & MONO_OPT_FCMOV) {
4063                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4064                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4065                                 break;
4066                         }
4067                         if (ins->opcode == OP_FBLT_UN) {
4068                                 guchar *is_not_zero_check, *end_jump;
4069                                 is_not_zero_check = code;
4070                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4071                                 end_jump = code;
4072                                 x86_jump8 (code, 0);
4073                                 amd64_patch (is_not_zero_check, code);
4074                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4075
4076                                 amd64_patch (end_jump, code);
4077                         }
4078                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4079                         break;
4080                 case OP_FBGT:
4081                 case OP_FBGT_UN:
4082                         if (cfg->opt & MONO_OPT_FCMOV) {
4083                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
4084                                 break;
4085                         }
4086                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4087                         if (ins->opcode == OP_FBGT_UN) {
4088                                 guchar *is_not_zero_check, *end_jump;
4089                                 is_not_zero_check = code;
4090                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4091                                 end_jump = code;
4092                                 x86_jump8 (code, 0);
4093                                 amd64_patch (is_not_zero_check, code);
4094                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4095
4096                                 amd64_patch (end_jump, code);
4097                         }
4098                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4099                         break;
4100                 case OP_FBGE:
4101                         /* Branch if C013 == 100 or 001 */
4102                         if (cfg->opt & MONO_OPT_FCMOV) {
4103                                 guchar *br1;
4104
4105                                 /* skip branch if C1=1 */
4106                                 br1 = code;
4107                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4108                                 /* branch if (C0 | C3) = 1 */
4109                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
4110                                 amd64_patch (br1, code);
4111                                 break;
4112                         }
4113                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4114                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4115                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4116                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4117                         break;
4118                 case OP_FBGE_UN:
4119                         /* Branch if C013 == 000 */
4120                         if (cfg->opt & MONO_OPT_FCMOV) {
4121                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
4122                                 break;
4123                         }
4124                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4125                         break;
4126                 case OP_FBLE:
4127                         /* Branch if C013=000 or 100 */
4128                         if (cfg->opt & MONO_OPT_FCMOV) {
4129                                 guchar *br1;
4130
4131                                 /* skip branch if C1=1 */
4132                                 br1 = code;
4133                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4134                                 /* branch if C0=0 */
4135                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
4136                                 amd64_patch (br1, code);
4137                                 break;
4138                         }
4139                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
4140                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4141                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4142                         break;
4143                 case OP_FBLE_UN:
4144                         /* Branch if C013 != 001 */
4145                         if (cfg->opt & MONO_OPT_FCMOV) {
4146                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4147                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4148                                 break;
4149                         }
4150                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4151                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4152                         break;
4153                 case CEE_CKFINITE: {
4154                         amd64_push_reg (code, AMD64_RAX);
4155                         amd64_fxam (code);
4156                         amd64_fnstsw (code);
4157                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
4158                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4159                         amd64_pop_reg (code, AMD64_RAX);
4160                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4161                         break;
4162                 }
4163                 case OP_X86_TLS_GET: {
4164                         x86_prefix (code, X86_FS_PREFIX);
4165                         amd64_mov_reg_mem (code, ins->dreg, ins->inst_offset, 8);
4166                         break;
4167                 }
4168                 default:
4169                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4170                         g_assert_not_reached ();
4171                 }
4172
4173                 if ((code - cfg->native_code - offset) > max_len) {
4174                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
4175                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4176                         g_assert_not_reached ();
4177                 }
4178                
4179                 cpos += max_len;
4180
4181                 last_ins = ins;
4182                 last_offset = offset;
4183                 
4184                 ins = ins->next;
4185         }
4186
4187         cfg->code_len = code - cfg->native_code;
4188 }
4189
4190 void
4191 mono_arch_register_lowlevel_calls (void)
4192 {
4193 }
4194
4195 void
4196 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4197 {
4198         MonoJumpInfo *patch_info;
4199
4200         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4201                 unsigned char *ip = patch_info->ip.i + code;
4202                 const unsigned char *target;
4203
4204                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4205
4206                 switch (patch_info->type) {
4207                 case MONO_PATCH_INFO_METHOD_REL:
4208                 case MONO_PATCH_INFO_METHOD_JUMP:
4209                         *((gconstpointer *)(ip + 2)) = target;
4210                         continue;
4211                 case MONO_PATCH_INFO_SWITCH: {
4212                         *((gconstpointer *)(ip + 2)) = target;
4213                         continue;
4214                 }
4215                 case MONO_PATCH_INFO_IID:
4216                         *((guint32 *)(ip + 2)) = (guint32)(guint64)target;
4217                         continue;                       
4218                 case MONO_PATCH_INFO_CLASS_INIT: {
4219                         /* FIXME: Might already been changed to a nop */
4220                         *((gconstpointer *)(ip + 2)) = target;
4221                         continue;
4222                 }
4223                 case MONO_PATCH_INFO_R8:
4224                 case MONO_PATCH_INFO_R4:
4225                         g_assert_not_reached ();
4226                         continue;
4227                 case MONO_PATCH_INFO_METHODCONST:
4228                 case MONO_PATCH_INFO_CLASS:
4229                 case MONO_PATCH_INFO_IMAGE:
4230                 case MONO_PATCH_INFO_FIELD:
4231                 case MONO_PATCH_INFO_VTABLE:
4232                 case MONO_PATCH_INFO_SFLDA:
4233                 case MONO_PATCH_INFO_EXC_NAME:
4234                 case MONO_PATCH_INFO_LDSTR:
4235                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
4236                 case MONO_PATCH_INFO_LDTOKEN:
4237                 case MONO_PATCH_INFO_IP:
4238                         *((gconstpointer *)(ip + 2)) = target;
4239                         continue;
4240                 case MONO_PATCH_INFO_METHOD:
4241                         *((gconstpointer *)(ip + 2)) = target;
4242                         continue;
4243                 case MONO_PATCH_INFO_ABS:
4244                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4245                         break;
4246                 default:
4247                         break;
4248                 }
4249                 amd64_patch (ip, (gpointer)target);
4250         }
4251 }
4252
4253 guint8 *
4254 mono_arch_emit_prolog (MonoCompile *cfg)
4255 {
4256         MonoMethod *method = cfg->method;
4257         MonoBasicBlock *bb;
4258         MonoMethodSignature *sig;
4259         MonoInst *inst;
4260         int alloc_size, pos, max_offset, i;
4261         guint8 *code;
4262         CallInfo *cinfo;
4263
4264         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
4265         code = cfg->native_code = g_malloc (cfg->code_size);
4266
4267         amd64_push_reg (code, AMD64_RBP);
4268         amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
4269
4270         /* Stack alignment check */
4271 #if 0
4272         {
4273                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
4274                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
4275                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4276                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
4277                 amd64_breakpoint (code);
4278         }
4279 #endif
4280
4281         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
4282         pos = 0;
4283
4284         if (method->save_lmf) {
4285
4286                 pos = ALIGN_TO (pos + sizeof (MonoLMF), 16);
4287
4288                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, pos);
4289
4290                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4291
4292                 /* Save ip */
4293                 amd64_lea_membase (code, AMD64_R11, AMD64_RIP, 0);
4294                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8);
4295                 /* Save fp */
4296                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), AMD64_RBP, 8);
4297                 /* Save method */
4298                 /* FIXME: add a relocation for this */
4299                 if (IS_IMM32 (cfg->method))
4300                         amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), (guint64)cfg->method, 8);
4301                 else {
4302                         amd64_mov_reg_imm (code, AMD64_R11, cfg->method);
4303                         amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8);
4304                 }
4305                 /* Save callee saved regs */
4306                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
4307                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
4308                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
4309                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
4310                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
4311         } else {
4312
4313                 for (i = 0; i < AMD64_NREG; ++i)
4314                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4315                                 amd64_push_reg (code, i);
4316                                 pos += sizeof (gpointer);
4317                         }
4318         }
4319
4320         alloc_size -= pos;
4321
4322         if (alloc_size) {
4323                 /* See mono_emit_stack_alloc */
4324 #ifdef PLATFORM_WIN32
4325                 guint32 remaining_size = alloc_size;
4326                 while (remaining_size >= 0x1000) {
4327                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
4328                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
4329                         remaining_size -= 0x1000;
4330                 }
4331                 if (remaining_size)
4332                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
4333 #else
4334                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
4335 #endif
4336         }
4337
4338         /* compute max_offset in order to use short forward jumps */
4339         max_offset = 0;
4340         if (cfg->opt & MONO_OPT_BRANCH) {
4341                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4342                         MonoInst *ins = bb->code;
4343                         bb->max_offset = max_offset;
4344
4345                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4346                                 max_offset += 6;
4347                         /* max alignment for loops */
4348                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4349                                 max_offset += LOOP_ALIGNMENT;
4350
4351                         while (ins) {
4352                                 if (ins->opcode == OP_LABEL)
4353                                         ins->inst_c1 = max_offset;
4354                                 
4355                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4356                                 ins = ins->next;
4357                         }
4358                 }
4359         }
4360
4361         sig = method->signature;
4362         pos = 0;
4363
4364         cinfo = get_call_info (sig, FALSE);
4365
4366         if (sig->ret->type != MONO_TYPE_VOID) {
4367                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
4368                         /* Save volatile arguments to the stack */
4369                         amd64_mov_membase_reg (code, cfg->ret->inst_basereg, cfg->ret->inst_offset, cinfo->ret.reg, 8);
4370                 }
4371         }
4372
4373         /* Keep this in sync with emit_load_volatile_arguments */
4374         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4375                 ArgInfo *ainfo = cinfo->args + i;
4376                 gint32 stack_offset;
4377                 MonoType *arg_type;
4378                 inst = cfg->varinfo [i];
4379
4380                 if (sig->hasthis && (i == 0))
4381                         arg_type = &mono_defaults.object_class->byval_arg;
4382                 else
4383                         arg_type = sig->params [i - sig->hasthis];
4384
4385                 stack_offset = ainfo->offset + ARGS_OFFSET;
4386
4387                 /* Save volatile arguments to the stack */
4388                 if (inst->opcode != OP_REGVAR) {
4389                         switch (ainfo->storage) {
4390                         case ArgInIReg: {
4391                                 guint32 size = 8;
4392
4393                                 /* FIXME: I1 etc */
4394                                 /*
4395                                 if (stack_offset & 0x1)
4396                                         size = 1;
4397                                 else if (stack_offset & 0x2)
4398                                         size = 2;
4399                                 else if (stack_offset & 0x4)
4400                                         size = 4;
4401                                 else
4402                                         size = 8;
4403                                 */
4404                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, size);
4405                                 break;
4406                         }
4407                         case ArgInFloatSSEReg:
4408                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4409                                 break;
4410                         case ArgInDoubleSSEReg:
4411                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4412                                 break;
4413                         default:
4414                                 break;
4415                         }
4416                 }
4417
4418                 if (inst->opcode == OP_REGVAR) {
4419                         /* Argument allocated to (non-volatile) register */
4420                         switch (ainfo->storage) {
4421                         case ArgInIReg:
4422                                 amd64_mov_reg_reg (code, inst->dreg, ainfo->reg, 8);
4423                                 break;
4424                         case ArgOnStack:
4425                                 amd64_mov_reg_membase (code, inst->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
4426                                 break;
4427                         default:
4428                                 g_assert_not_reached ();
4429                         }
4430                 }
4431         }
4432
4433         if (method->save_lmf) {
4434                 if (lmf_tls_offset != -1) {
4435                         /* Load lmf quicky using the FS register */
4436                         x86_prefix (code, X86_FS_PREFIX);
4437                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
4438                 }
4439                 else {
4440                         /* 
4441                          * The call might clobber argument registers, but they are already
4442                          * saved to the stack/global regs.
4443                          */
4444
4445                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4446                                                                  (gpointer)"mono_get_lmf_addr");                
4447                 }
4448
4449                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4450
4451                 /* Save lmf_addr */
4452                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
4453                 /* Save previous_lmf */
4454                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
4455                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
4456                 /* Set new lmf */
4457                 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
4458                 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
4459         }
4460
4461
4462         g_free (cinfo);
4463
4464         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4465                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4466
4467         cfg->code_len = code - cfg->native_code;
4468
4469         g_assert (cfg->code_len < cfg->code_size);
4470
4471         return code;
4472 }
4473
4474 void
4475 mono_arch_emit_epilog (MonoCompile *cfg)
4476 {
4477         MonoJumpInfo *patch_info;
4478         MonoMethod *method = cfg->method;
4479         int pos, i;
4480         guint8 *code;
4481
4482         code = cfg->native_code + cfg->code_len;
4483
4484         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4485                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4486
4487         /* the code restoring the registers must be kept in sync with CEE_JMP */
4488         pos = 0;
4489         
4490         if (method->save_lmf) {
4491                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4492
4493                 /* Restore previous lmf */
4494                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
4495                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
4496                 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
4497
4498                 /* Restore caller saved regs */
4499                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4500                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4501                 }
4502                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4503                         amd64_mov_reg_membase (code, AMD64_R12, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4504                 }
4505                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4506                         amd64_mov_reg_membase (code, AMD64_R13, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4507                 }
4508                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4509                         amd64_mov_reg_membase (code, AMD64_R14, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4510                 }
4511                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4512                         amd64_mov_reg_membase (code, AMD64_R15, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4513                 }
4514         } else {
4515
4516                 for (i = 0; i < AMD64_NREG; ++i)
4517                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4518                                 pos -= sizeof (gpointer);
4519
4520                 if (pos) {
4521                         if (pos == - sizeof (gpointer)) {
4522                                 /* Only one register, so avoid lea */
4523                                 for (i = AMD64_NREG - 1; i > 0; --i)
4524                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4525                                                 amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4526                                         }
4527                         }
4528                         else {
4529                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4530
4531                                 /* Pop registers in reverse order */
4532                                 for (i = AMD64_NREG - 1; i > 0; --i)
4533                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4534                                                 amd64_pop_reg (code, i);
4535                                         }
4536                         }
4537                 }
4538         }
4539
4540         amd64_leave (code);
4541         amd64_ret (code);
4542
4543         /* add code to raise exceptions */
4544         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4545                 switch (patch_info->type) {
4546                 case MONO_PATCH_INFO_EXC: {
4547                         guint64 offset;
4548
4549                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4550                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
4551                         amd64_set_reg_template (code, AMD64_RDI);
4552                         /* 7 is the length of the lea */
4553                         offset = (((guint64)code + 7) - (guint64)cfg->native_code) - (guint64)patch_info->ip.i;
4554                         amd64_lea_membase (code, AMD64_RSI, AMD64_RIP, - offset);
4555                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4556                         patch_info->data.name = "mono_arch_throw_exception_by_name";
4557                         patch_info->ip.i = code - cfg->native_code;
4558                         EMIT_CALL ();
4559                         break;
4560                 }
4561                 default:
4562                         /* do nothing */
4563                         break;
4564                 }
4565         }
4566
4567         /* Handle relocations with RIP relative addressing */
4568         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4569                 gboolean remove = FALSE;
4570
4571                 switch (patch_info->type) {
4572                 case MONO_PATCH_INFO_R8: {
4573                         code = (guint8*)ALIGN_TO (code, 8);
4574
4575                         guint8* pos = cfg->native_code + patch_info->ip.i;
4576
4577                         *(double*)code = *(double*)patch_info->data.target;
4578
4579                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4580                         code += 8;
4581
4582                         remove = TRUE;
4583                         break;
4584                 }
4585                 case MONO_PATCH_INFO_R4: {
4586                         code = (guint8*)ALIGN_TO (code, 8);
4587
4588                         guint8* pos = cfg->native_code + patch_info->ip.i;
4589
4590                         *(float*)code = *(float*)patch_info->data.target;
4591
4592                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4593                         code += 4;
4594
4595                         remove = TRUE;
4596                         break;
4597                 }
4598                 default:
4599                         break;
4600                 }
4601
4602                 if (remove) {
4603                         if (patch_info == cfg->patch_info)
4604                                 cfg->patch_info = patch_info->next;
4605                         else {
4606                                 MonoJumpInfo *tmp;
4607
4608                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4609                                         ;
4610                                 tmp->next = patch_info->next;
4611                         }
4612                 }
4613         }
4614
4615         cfg->code_len = code - cfg->native_code;
4616
4617         g_assert (cfg->code_len < cfg->code_size);
4618
4619 }
4620
4621 /*
4622  * Allow tracing to work with this interface (with an optional argument)
4623  */
4624
4625 /*
4626  * This may be needed on some archs or for debugging support.
4627  */
4628 void
4629 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
4630 {
4631         /* no stack room needed now (may be needed for FASTCALL-trace support) */
4632         *stack = 0;
4633         /* split prolog-epilog requirements? */
4634         *code = 50; /* max bytes needed: check this number */
4635 }
4636
4637 void*
4638 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4639 {
4640         guchar *code = p;
4641         CallInfo *cinfo;
4642         MonoMethodSignature *sig;
4643         MonoInst *inst;
4644         int i, n, stack_area = 0;
4645
4646         /* Keep this in sync with mono_arch_get_argument_info */
4647
4648         if (enable_arguments) {
4649                 /* Allocate a new area on the stack and save arguments there */
4650                 sig = cfg->method->signature;
4651
4652                 cinfo = get_call_info (sig, FALSE);
4653
4654                 n = sig->param_count + sig->hasthis;
4655
4656                 stack_area = ALIGN_TO (n * 8, 16);
4657
4658                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4659
4660                 for (i = 0; i < n; ++i) {
4661                         ArgInfo *ainfo = cinfo->args + i;
4662                         gint32 stack_offset;
4663                         MonoType *arg_type;
4664                         inst = cfg->varinfo [i];
4665
4666                         if (sig->hasthis && (i == 0))
4667                                 arg_type = &mono_defaults.object_class->byval_arg;
4668                         else
4669                                 arg_type = sig->params [i - sig->hasthis];
4670
4671                         stack_offset = ainfo->offset + ARGS_OFFSET;
4672
4673                         switch (ainfo->storage) {
4674                         case ArgInIReg:
4675                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg, 8);                                
4676                                 break;
4677                         case ArgInFloatSSEReg:
4678                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4679                                 break;
4680                         case ArgInDoubleSSEReg:
4681                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4682                                 break;
4683                         case ArgOnStack:
4684                                 /* Copy from original stack location to the argument area */
4685                                 /* FIXME: valuetypes etc */
4686                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4687                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4688                                 break;
4689                         default:
4690                                 g_assert_not_reached ();
4691                         }
4692                 }
4693         }
4694
4695         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4696         amd64_set_reg_template (code, AMD64_RDI);
4697         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RSP, 8);
4698         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4699
4700         if (enable_arguments) {
4701                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4702
4703                 g_free (cinfo);
4704         }
4705
4706         return code;
4707 }
4708
4709 enum {
4710         SAVE_NONE,
4711         SAVE_STRUCT,
4712         SAVE_EAX,
4713         SAVE_EAX_EDX,
4714         SAVE_XMM
4715 };
4716
4717 void*
4718 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4719 {
4720         guchar *code = p;
4721         int save_mode = SAVE_NONE;
4722         MonoMethod *method = cfg->method;
4723         int rtype = method->signature->ret->type;
4724
4725 handle_enum:
4726         switch (rtype) {
4727         case MONO_TYPE_VOID:
4728                 /* special case string .ctor icall */
4729                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4730                         save_mode = SAVE_EAX;
4731                 else
4732                         save_mode = SAVE_NONE;
4733                 break;
4734         case MONO_TYPE_I8:
4735         case MONO_TYPE_U8:
4736                 save_mode = SAVE_EAX;
4737                 break;
4738         case MONO_TYPE_R4:
4739         case MONO_TYPE_R8:
4740                 save_mode = SAVE_XMM;
4741                 break;
4742         case MONO_TYPE_VALUETYPE:
4743                 if (method->signature->ret->data.klass->enumtype) {
4744                         rtype = method->signature->ret->data.klass->enum_basetype->type;
4745                         goto handle_enum;
4746                 }
4747                 save_mode = SAVE_STRUCT;
4748                 break;
4749         default:
4750                 save_mode = SAVE_EAX;
4751                 break;
4752         }
4753
4754         /* Save the result and copy it into the proper argument register */
4755         switch (save_mode) {
4756         case SAVE_EAX:
4757                 amd64_push_reg (code, AMD64_RAX);
4758                 /* Align stack */
4759                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4760                 if (enable_arguments)
4761                         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RAX, 8);
4762                 break;
4763         case SAVE_STRUCT:
4764                 /* FIXME: */
4765                 if (enable_arguments)
4766                         amd64_mov_reg_imm (code, AMD64_RSI, 0);
4767                 break;
4768         case SAVE_XMM:
4769                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4770                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4771                 /* Align stack */
4772                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4773                 /* 
4774                  * The result is already in the proper argument register so no copying
4775                  * needed.
4776                  */
4777                 break;
4778         case SAVE_NONE:
4779                 break;
4780         default:
4781                 g_assert_not_reached ();
4782         }
4783
4784         /* Set %al since this is a varargs call */
4785         if (save_mode == SAVE_XMM)
4786                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4787         else
4788                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4789
4790         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4791         amd64_set_reg_template (code, AMD64_RDI);
4792         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4793
4794         /* Restore result */
4795         switch (save_mode) {
4796         case SAVE_EAX:
4797                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4798                 amd64_pop_reg (code, AMD64_RAX);
4799                 break;
4800         case SAVE_STRUCT:
4801                 /* FIXME: */
4802                 break;
4803         case SAVE_XMM:
4804                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4805                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4806                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4807                 break;
4808         case SAVE_NONE:
4809                 break;
4810         default:
4811                 g_assert_not_reached ();
4812         }
4813
4814         return code;
4815 }
4816
4817 int
4818 mono_arch_max_epilog_size (MonoCompile *cfg)
4819 {
4820         int max_epilog_size = 16;
4821         MonoJumpInfo *patch_info;
4822         
4823         if (cfg->method->save_lmf)
4824                 max_epilog_size += 256;
4825         
4826         if (mono_jit_trace_calls != NULL)
4827                 max_epilog_size += 50;
4828
4829         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4830                 max_epilog_size += 50;
4831
4832         max_epilog_size += (AMD64_NREG * 2);
4833
4834         /* 
4835          * make sure we have enough space for exceptions
4836          */
4837         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4838                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4839                         max_epilog_size += 40;
4840                 if (patch_info->type == MONO_PATCH_INFO_R8)
4841                         max_epilog_size += 8 + 7; /* sizeof (double) + alignment */
4842                 if (patch_info->type == MONO_PATCH_INFO_R4)
4843                         max_epilog_size += 4 + 7; /* sizeof (float) + alignment */
4844         }
4845
4846         return max_epilog_size;
4847 }
4848
4849 void
4850 mono_arch_flush_icache (guint8 *code, gint size)
4851 {
4852         /* not needed */
4853 }
4854
4855 void
4856 mono_arch_flush_register_windows (void)
4857 {
4858 }
4859
4860 gboolean 
4861 mono_arch_is_inst_imm (gint64 imm)
4862 {
4863         return amd64_is_imm32 (imm);
4864 }
4865
4866 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
4867
4868 static int reg_to_ucontext_reg [] = {
4869         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
4870         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
4871         REG_RIP
4872 };
4873
4874 /*
4875  * Determine whenever the trap whose info is in SIGINFO is caused by
4876  * integer overflow.
4877  */
4878 gboolean
4879 mono_arch_is_int_overflow (void *sigctx)
4880 {
4881         ucontext_t *ctx = (ucontext_t*)sigctx;
4882         guint8* rip;
4883         int reg;
4884
4885         rip = (guint8*)ctx->uc_mcontext.gregs [REG_RIP];
4886
4887         if (IS_REX (rip [0])) {
4888                 reg = amd64_rex_r (rip [0]);
4889                 rip ++;
4890         }
4891         else
4892                 reg = 0;
4893
4894         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4895                 /* idiv REG */
4896                 reg += x86_modrm_rm (rip [1]);
4897
4898                 if (ctx->uc_mcontext.gregs [reg_to_ucontext_reg [reg]] == -1)
4899                         return TRUE;
4900         }
4901
4902         return FALSE;
4903 }
4904
4905 gpointer*
4906 mono_amd64_get_vcall_slot_addr (guint8* code, guint64 *regs)
4907 {
4908         guint32 reg;
4909         guint32 disp;
4910         guint8 rex = 0;
4911
4912         /* go to the start of the call instruction
4913          *
4914          * address_byte = (m << 6) | (o << 3) | reg
4915          * call opcode: 0xff address_byte displacement
4916          * 0xff m=1,o=2 imm8
4917          * 0xff m=2,o=2 imm32
4918          */
4919         code -= 6;
4920
4921         if (IS_REX (code [3]) && (code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x3)) {
4922                 /* call *%reg */
4923                 return NULL;
4924         }
4925         else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2)) {
4926                 /* call *[reg+disp32] */
4927                 reg = amd64_modrm_rm (code [1]);
4928                 disp = *(guint32*)(code + 2);
4929                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4930         }
4931         else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1)) {
4932                 /* call *[reg+disp8] */
4933                 reg = amd64_modrm_rm (code [4]);
4934                 disp = *(guint8*)(code + 5);
4935                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4936         }
4937         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x0)) {
4938                         /*
4939                          * This is a interface call: should check the above code can't catch it earlier 
4940                          * 8b 40 30   mov    0x30(%eax),%eax
4941                          * ff 10      call   *(%eax)
4942                          */
4943                 reg = amd64_modrm_rm (code [5]);
4944                 disp = 0;
4945         }
4946         else
4947                 g_assert_not_reached ();
4948
4949         reg += amd64_rex_b (rex);
4950
4951         /* FIXME: */
4952         return (gpointer)((regs [reg]) + disp);
4953 }
4954
4955 /*
4956  * Support for fast access to the thread-local lmf structure using the GS
4957  * segment register on NPTL + kernel 2.6.x.
4958  */
4959
4960 static gboolean tls_offset_inited = FALSE;
4961
4962 /* code should be simply return <tls var>; */
4963 static int 
4964 read_tls_offset_from_method (void* method)
4965 {
4966         guint8 *code = (guint8*)method;
4967
4968         /* 
4969          * Determine the offset of mono_lfm_addr inside the TLS structures
4970          * by disassembling the function above.
4971          */
4972         /* This is generated by gcc 3.3.2 */
4973         if ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
4974                 (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
4975                 (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25) &&
4976                 (code [9] == 0x00) && (code [10] == 0x00) && (code [11] == 0x00) &&
4977                 (code [12] == 0x0) && (code [13] == 0x48) && (code [14] == 0x8b) &&
4978                 (code [15] == 0x80)) {
4979                 return *(gint32*)&(code [16]);
4980         } else if 
4981                 /* This is generated by gcc-3.4.1 */
4982                 ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
4983                  (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
4984                  (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25)) {
4985                         return *(gint32*)&(code [9]);
4986         } else if
4987                 /* This is generated by gcc-3.4.1 with -O=2 */
4988                 ((code [0] == 0x64) && (code [1] == 0x48) && (code [2] == 0x8b) &&
4989                  (code [3] == 0x04) && (code [4] == 0x25)) {
4990                 return *(gint32*)&(code [5]);
4991         }
4992
4993         return -1;
4994 }
4995
4996 void
4997 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4998 {
4999 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5000         pthread_t self = pthread_self();
5001         pthread_attr_t attr;
5002         void *staddr = NULL;
5003         size_t stsize = 0;
5004         struct sigaltstack sa;
5005 #endif
5006
5007         if (!tls_offset_inited) {
5008                 tls_offset_inited = TRUE;
5009
5010                 lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
5011                 appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
5012                 //thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
5013         }               
5014
5015 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5016
5017         /* Determine stack boundaries */
5018         if (!mono_running_on_valgrind ()) {
5019 #ifdef HAVE_PTHREAD_GETATTR_NP
5020                 pthread_getattr_np( self, &attr );
5021 #else
5022 #ifdef HAVE_PTHREAD_ATTR_GET_NP
5023                 pthread_attr_get_np( self, &attr );
5024 #elif defined(sun)
5025                 pthread_attr_init( &attr );
5026                 pthread_attr_getstacksize( &attr, &stsize );
5027 #else
5028 #error "Not implemented"
5029 #endif
5030 #endif
5031 #ifndef sun
5032                 pthread_attr_getstack( &attr, &staddr, &stsize );
5033 #endif
5034         }
5035
5036         /* 
5037          * staddr seems to be wrong for the main thread, so we keep the value in
5038          * tls->end_of_stack
5039          */
5040         tls->stack_size = stsize;
5041
5042         /* Setup an alternate signal stack */
5043         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
5044         tls->signal_stack_size = SIGNAL_STACK_SIZE;
5045
5046         sa.ss_sp = tls->signal_stack;
5047         sa.ss_size = SIGNAL_STACK_SIZE;
5048         sa.ss_flags = SS_ONSTACK;
5049         sigaltstack (&sa, NULL);
5050 #endif
5051 }
5052
5053 void
5054 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5055 {
5056 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5057         struct sigaltstack sa;
5058
5059         sa.ss_sp = tls->signal_stack;
5060         sa.ss_size = SIGNAL_STACK_SIZE;
5061         sa.ss_flags = SS_DISABLE;
5062         sigaltstack  (&sa, NULL);
5063
5064         if (tls->signal_stack)
5065                 g_free (tls->signal_stack);
5066 #endif
5067 }
5068
5069 void
5070 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
5071 {
5072         int out_reg = param_regs [0];
5073
5074         /* FIXME: RDI and RSI might get clobbered */
5075
5076         if (vt_reg != -1) {
5077                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
5078                 MonoInst *vtarg;
5079
5080                 if (cinfo->ret.storage == ArgValuetypeInReg) {
5081                         /*
5082                          * The valuetype is in RAX:RDX after the call, need to be copied to
5083                          * the stack. Push the address here, so the call instruction can
5084                          * access it.
5085                          */
5086                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
5087                         vtarg->sreg1 = vt_reg;
5088                         mono_bblock_add_inst (cfg->cbb, vtarg);
5089
5090                         /* Align stack */
5091                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
5092                 }
5093                 else {
5094                         MONO_INST_NEW (cfg, vtarg, OP_SETREG);
5095                         vtarg->sreg1 = vt_reg;
5096                         vtarg->dreg = out_reg;
5097                         out_reg = param_regs [1];
5098                         mono_bblock_add_inst (cfg->cbb, vtarg);
5099                 }
5100
5101                 g_free (cinfo);
5102         }
5103
5104         /* add the this argument */
5105         if (this_reg != -1) {
5106                 MonoInst *this;
5107                 MONO_INST_NEW (cfg, this, OP_SETREG);
5108                 this->type = this_type;
5109                 this->sreg1 = this_reg;
5110                 this->dreg = out_reg;
5111                 mono_bblock_add_inst (cfg->cbb, this);
5112         }
5113 }
5114
5115 gint
5116 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5117 {
5118         if (cmethod->klass == mono_defaults.math_class) {
5119                 if (strcmp (cmethod->name, "Sin") == 0)
5120                         return OP_SIN;
5121                 else if (strcmp (cmethod->name, "Cos") == 0)
5122                         return OP_COS;
5123                 else if (strcmp (cmethod->name, "Tan") == 0)
5124                         return OP_TAN;
5125                 else if (strcmp (cmethod->name, "Atan") == 0)
5126                         return OP_ATAN;
5127                 else if (strcmp (cmethod->name, "Sqrt") == 0)
5128                         return OP_SQRT;
5129                 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
5130                         return OP_ABS;
5131 #if 0
5132                 /* OP_FREM is not IEEE compatible */
5133                 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
5134                         return OP_FREM;
5135 #endif
5136                 else
5137                         return -1;
5138         } else {
5139                 return -1;
5140         }
5141         return -1;
5142 }
5143
5144
5145 gboolean
5146 mono_arch_print_tree (MonoInst *tree, int arity)
5147 {
5148         return 0;
5149 }
5150
5151 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5152 {
5153         MonoInst* ins;
5154         
5155         if (appdomain_tls_offset == -1)
5156                 return NULL;
5157         
5158         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5159         ins->inst_offset = appdomain_tls_offset;
5160         return ins;
5161 }
5162
5163 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5164 {
5165         MonoInst* ins;
5166         
5167         if (thread_tls_offset == -1)
5168                 return NULL;
5169         
5170         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5171         ins->inst_offset = thread_tls_offset;
5172         return ins;
5173 }