2004-08-26 Ben Maurer <bmaurer@users.sourceforge.net>
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *
11  * (C) 2003 Ximian, Inc.
12  */
13 #include "mini.h"
14 #include <string.h>
15 #include <math.h>
16
17 #include <mono/metadata/appdomain.h>
18 #include <mono/metadata/debug-helpers.h>
19 #include <mono/metadata/threads.h>
20 #include <mono/metadata/profiler-private.h>
21 #include <mono/utils/mono-math.h>
22
23 #include "trace.h"
24 #include "mini-amd64.h"
25 #include "inssel.h"
26 #include "cpu-amd64.h"
27
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
33
34 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
35
36 #ifdef PLATFORM_WIN32
37 /* Under windows, the default pinvoke calling convention is stdcall */
38 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
39 #else
40 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
41 #endif
42
43 #define SIGNAL_STACK_SIZE (64 * 1024)
44
45 #define ARGS_OFFSET 16
46 #define GP_SCRATCH_REG AMD64_R11
47
48 /*
49  * AMD64 register usage:
50  * - callee saved registers are used for global register allocation
51  * - %r11 is used for materializing 64 bit constants in opcodes
52  * - the rest is used for local allocation
53  */
54
55 /*
56  * FIXME: 
57  * - Use xmm registers instead of the x87 stack
58  * - Allocate arguments to global registers
59  * - implement emulated opcodes
60  * - (all archs) do not store trampoline addresses in method->info since they
61  *   are domain specific.   
62  */
63
64 #define NOT_IMPLEMENTED g_assert_not_reached ()
65
66 const char*
67 mono_arch_regname (int reg) {
68         switch (reg) {
69         case AMD64_RAX: return "%rax";
70         case AMD64_RBX: return "%rbx";
71         case AMD64_RCX: return "%rcx";
72         case AMD64_RDX: return "%rdx";
73         case AMD64_RSP: return "%rsp";  
74         case AMD64_RBP: return "%rbp";
75         case AMD64_RDI: return "%rdi";
76         case AMD64_RSI: return "%rsi";
77         case AMD64_R8: return "%r8";
78         case AMD64_R9: return "%r9";
79         case AMD64_R10: return "%r10";
80         case AMD64_R11: return "%r11";
81         case AMD64_R12: return "%r12";
82         case AMD64_R13: return "%r13";
83         case AMD64_R14: return "%r14";
84         case AMD64_R15: return "%r15";
85         }
86         return "unknown";
87 }
88
89 static inline void 
90 amd64_patch (unsigned char* code, gpointer target)
91 {
92         /* Skip REX */
93         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
94                 code += 1;
95
96         if (code [0] == 0xbb) {
97                 /* amd64_set_reg_template */
98                 *(guint64*)(code + 1) = (guint64)target;
99         }
100         else
101                 x86_patch (code, (unsigned char*)target);
102 }
103
104 typedef enum {
105         ArgInIReg,
106         ArgInFloatSSEReg,
107         ArgInDoubleSSEReg,
108         ArgOnStack,
109         ArgValuetypeInReg,
110         ArgNone /* only in pair_storage */
111 } ArgStorage;
112
113 typedef struct {
114         gint16 offset;
115         gint8  reg;
116         ArgStorage storage;
117
118         /* Only if storage == ArgValuetypeInReg */
119         ArgStorage pair_storage [2];
120         gint8 pair_regs [2];
121 } ArgInfo;
122
123 typedef struct {
124         int nargs;
125         guint32 stack_usage;
126         guint32 reg_usage;
127         guint32 freg_usage;
128         gboolean need_stack_align;
129         ArgInfo ret;
130         ArgInfo sig_cookie;
131         ArgInfo args [1];
132 } CallInfo;
133
134 #define DEBUG(a) if (cfg->verbose_level > 1) a
135
136 #define NEW_ICONST(cfg,dest,val) do {   \
137                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
138                 (dest)->opcode = OP_ICONST;     \
139                 (dest)->inst_c0 = (val);        \
140                 (dest)->type = STACK_I4;        \
141         } while (0)
142
143 #define PARAM_REGS 6
144
145 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
146
147 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
148
149 static void inline
150 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
151 {
152     ainfo->offset = *stack_size;
153
154     if (*gr >= PARAM_REGS) {
155                 ainfo->storage = ArgOnStack;
156                 (*stack_size) += sizeof (gpointer);
157     }
158     else {
159                 ainfo->storage = ArgInIReg;
160                 ainfo->reg = param_regs [*gr];
161                 (*gr) ++;
162     }
163 }
164
165 #define FLOAT_PARAM_REGS 8
166
167 static void inline
168 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
169 {
170     ainfo->offset = *stack_size;
171
172     if (*gr >= FLOAT_PARAM_REGS) {
173                 ainfo->storage = ArgOnStack;
174                 (*stack_size) += sizeof (gpointer);
175     }
176     else {
177                 /* A double register */
178                 if (is_double)
179                         ainfo->storage = ArgInDoubleSSEReg;
180                 else
181                         ainfo->storage = ArgInFloatSSEReg;
182                 ainfo->reg = *gr;
183                 (*gr) += 1;
184     }
185 }
186
187 typedef enum ArgumentClass {
188         ARG_CLASS_NO_CLASS,
189         ARG_CLASS_MEMORY,
190         ARG_CLASS_INTEGER,
191         ARG_CLASS_SSE
192 } ArgumentClass;
193
194 static ArgumentClass
195 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
196 {
197         ArgumentClass class2;
198
199         switch (type->type) {
200         case MONO_TYPE_BOOLEAN:
201         case MONO_TYPE_CHAR:
202         case MONO_TYPE_I1:
203         case MONO_TYPE_U1:
204         case MONO_TYPE_I2:
205         case MONO_TYPE_U2:
206         case MONO_TYPE_I4:
207         case MONO_TYPE_U4:
208         case MONO_TYPE_I:
209         case MONO_TYPE_U:
210         case MONO_TYPE_STRING:
211         case MONO_TYPE_OBJECT:
212         case MONO_TYPE_CLASS:
213         case MONO_TYPE_SZARRAY:
214         case MONO_TYPE_PTR:
215         case MONO_TYPE_FNPTR:
216         case MONO_TYPE_ARRAY:
217         case MONO_TYPE_I8:
218         case MONO_TYPE_U8:
219                 class2 = ARG_CLASS_INTEGER;
220                 break;
221         case MONO_TYPE_R4:
222         case MONO_TYPE_R8:
223                 class2 = ARG_CLASS_SSE;
224                 break;
225
226         case MONO_TYPE_TYPEDBYREF:
227                 g_assert_not_reached ();
228
229         case MONO_TYPE_VALUETYPE:
230                 if (type->data.klass->enumtype)
231                         class2 = ARG_CLASS_INTEGER;
232                 else {
233                         MonoMarshalType *info = mono_marshal_load_type_info (type->data.klass);
234                         int i;
235
236                         for (i = 0; i < info->num_fields; ++i) {
237                                 class2 = class1;
238                                 class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
239                         }
240                 }
241                 break;
242         }
243
244         /* Merge */
245         if (class1 == class2)
246                 ;
247         else if (class1 == ARG_CLASS_NO_CLASS)
248                 class1 = class2;
249         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
250                 class1 = ARG_CLASS_MEMORY;
251         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
252                 class1 = ARG_CLASS_INTEGER;
253         else
254                 class1 = ARG_CLASS_SSE;
255
256         return class1;
257 }
258
259 static void
260 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
261                            gboolean is_return,
262                            guint32 *gr, guint32 *fr, guint32 *stack_size)
263 {
264         guint32 size, quad, nquads, i;
265         ArgumentClass args [2];
266         MonoMarshalType *info;
267
268         if (sig->pinvoke) 
269                 size = mono_type_native_stack_size (&type->data.klass->byval_arg, NULL);
270         else 
271                 size = mono_type_stack_size (&type->data.klass->byval_arg, NULL);
272
273         if (!sig->pinvoke || (size == 0) || (size > 16)) {
274                 /* Allways pass in memory */
275                 ainfo->offset = *stack_size;
276                 *stack_size += ALIGN_TO (size, 8);
277                 ainfo->storage = ArgOnStack;
278
279                 return;
280         }
281
282         /* FIXME: Handle structs smaller than 8 bytes */
283         //if ((size % 8) != 0)
284         //      NOT_IMPLEMENTED;
285
286         if (size > 8)
287                 nquads = 2;
288         else
289                 nquads = 1;
290
291         /*
292          * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
293          * The X87 and SSEUP stuff is left out since there are no such types in
294          * the CLR.
295          */
296         info = mono_marshal_load_type_info (type->data.klass);
297         g_assert (info);
298         if (info->native_size > 16) {
299                 ainfo->offset = *stack_size;
300                 *stack_size += ALIGN_TO (info->native_size, 8);
301                 ainfo->storage = ArgOnStack;
302
303                 return;
304         }
305
306         for (quad = 0; quad < nquads; ++quad) {
307                 int size, align;
308                 ArgumentClass class1;
309                 
310                 class1 = ARG_CLASS_NO_CLASS;
311                 for (i = 0; i < info->num_fields; ++i) {
312                         size = mono_marshal_type_size (info->fields [i].field->type, 
313                                                                                    info->fields [i].mspec, 
314                                                                                    &align, TRUE, type->data.klass->unicode);
315                         if ((info->fields [i].offset < 8) && (info->fields [i].offset + size) > 8) {
316                                 /* Unaligned field */
317                                 NOT_IMPLEMENTED;
318                         }
319
320                         /* Skip fields in other quad */
321                         if ((quad == 0) && (info->fields [i].offset >= 8))
322                                 continue;
323                         if ((quad == 1) && (info->fields [i].offset < 8))
324                                 continue;
325
326                         class1 = merge_argument_class_from_type (info->fields [i].field->type, class1);
327                 }
328                 g_assert (class1 != ARG_CLASS_NO_CLASS);
329                 args [quad] = class1;
330         }
331
332         /* Post merger cleanup */
333         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
334                 args [0] = args [1] = ARG_CLASS_MEMORY;
335
336         /* Allocate registers */
337         {
338                 int orig_gr = *gr;
339                 int orig_fr = *fr;
340
341                 ainfo->storage = ArgValuetypeInReg;
342                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
343                 for (quad = 0; quad < nquads; ++quad) {
344                         switch (args [quad]) {
345                         case ARG_CLASS_INTEGER:
346                                 if (*gr >= PARAM_REGS)
347                                         args [quad] = ARG_CLASS_MEMORY;
348                                 else {
349                                         ainfo->pair_storage [quad] = ArgInIReg;
350                                         if (is_return)
351                                                 ainfo->pair_regs [quad] = return_regs [*gr];
352                                         else
353                                                 ainfo->pair_regs [quad] = param_regs [*gr];
354                                         (*gr) ++;
355                                 }
356                                 break;
357                         case ARG_CLASS_SSE:
358                                 if (*fr >= FLOAT_PARAM_REGS)
359                                         args [quad] = ARG_CLASS_MEMORY;
360                                 else {
361                                         ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
362                                         ainfo->pair_regs [quad] = *fr;
363                                         (*fr) ++;
364                                 }
365                                 break;
366                         case ARG_CLASS_MEMORY:
367                                 break;
368                         default:
369                                 g_assert_not_reached ();
370                         }
371                 }
372
373                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
374                         /* Revert possible register assignments */
375                         *gr = orig_gr;
376                         *fr = orig_fr;
377
378                         ainfo->offset = *stack_size;
379                         *stack_size += ALIGN_TO (info->native_size, 8);
380                         ainfo->storage = ArgOnStack;
381                 }
382         }
383 }
384
385 /*
386  * get_call_info:
387  *
388  *  Obtain information about a call according to the calling convention.
389  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
390  * Draft Version 0.23" document for more information.
391  */
392 static CallInfo*
393 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
394 {
395         guint32 i, gr, fr, simpletype;
396         MonoType *ret_type;
397         int n = sig->hasthis + sig->param_count;
398         guint32 stack_size = 0;
399         CallInfo *cinfo;
400
401         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
402
403         gr = 0;
404         fr = 0;
405
406         /* return value */
407         {
408                 ret_type = sig->ret;
409 enum_retvalue:
410                 simpletype = ret_type->type;
411                 switch (simpletype) {
412                 case MONO_TYPE_BOOLEAN:
413                 case MONO_TYPE_I1:
414                 case MONO_TYPE_U1:
415                 case MONO_TYPE_I2:
416                 case MONO_TYPE_U2:
417                 case MONO_TYPE_CHAR:
418                 case MONO_TYPE_I4:
419                 case MONO_TYPE_U4:
420                 case MONO_TYPE_I:
421                 case MONO_TYPE_U:
422                 case MONO_TYPE_PTR:
423                 case MONO_TYPE_CLASS:
424                 case MONO_TYPE_OBJECT:
425                 case MONO_TYPE_SZARRAY:
426                 case MONO_TYPE_ARRAY:
427                 case MONO_TYPE_STRING:
428                         cinfo->ret.storage = ArgInIReg;
429                         cinfo->ret.reg = AMD64_RAX;
430                         break;
431                 case MONO_TYPE_U8:
432                 case MONO_TYPE_I8:
433                         cinfo->ret.storage = ArgInIReg;
434                         cinfo->ret.reg = AMD64_RAX;
435                         break;
436                 case MONO_TYPE_R4:
437                         cinfo->ret.storage = ArgInFloatSSEReg;
438                         cinfo->ret.reg = AMD64_XMM0;
439                         break;
440                 case MONO_TYPE_R8:
441                         cinfo->ret.storage = ArgInDoubleSSEReg;
442                         cinfo->ret.reg = AMD64_XMM0;
443                         break;
444                 case MONO_TYPE_VALUETYPE: {
445                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
446
447                         if (ret_type->data.klass->enumtype) {
448                                 ret_type = ret_type->data.klass->enum_basetype;
449                                 goto enum_retvalue;
450                         }
451
452                         add_valuetype (sig, &cinfo->ret, ret_type, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
453                         if (cinfo->ret.storage == ArgOnStack)
454                                 /* The caller passes the address where the value is stored */
455                                 add_general (&gr, &stack_size, &cinfo->ret);
456                         break;
457                 }
458                 case MONO_TYPE_TYPEDBYREF:
459                         /* Same as a valuetype with size 24 */
460                         add_general (&gr, &stack_size, &cinfo->ret);
461                         ;
462                         break;
463                 case MONO_TYPE_GENERICINST:
464                         ret_type = ret_type->data.generic_inst->generic_type;
465                         goto enum_retvalue;
466                 case MONO_TYPE_VOID:
467                         break;
468                 default:
469                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
470                 }
471         }
472
473         /* this */
474         if (sig->hasthis)
475                 add_general (&gr, &stack_size, cinfo->args + 0);
476
477         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
478                 gr = PARAM_REGS;
479                 fr = FLOAT_PARAM_REGS;
480                 
481                 /* Emit the signature cookie just before the implicit arguments */
482                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
483         }
484
485         for (i = 0; i < sig->param_count; ++i) {
486                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
487                 MonoType *ptype;
488
489                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
490                         /* We allways pass the sig cookie on the stack for simplicity */
491                         /* 
492                          * Prevent implicit arguments + the sig cookie from being passed 
493                          * in registers.
494                          */
495                         gr = PARAM_REGS;
496                         fr = FLOAT_PARAM_REGS;
497
498                         /* Emit the signature cookie just before the implicit arguments */
499                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
500                 }
501
502                 if (sig->params [i]->byref) {
503                         add_general (&gr, &stack_size, ainfo);
504                         continue;
505                 }
506                 ptype = sig->params [i];
507         handle_enum:
508                 simpletype = ptype->type;
509                 switch (simpletype) {
510                 case MONO_TYPE_BOOLEAN:
511                 case MONO_TYPE_I1:
512                 case MONO_TYPE_U1:
513                         add_general (&gr, &stack_size, ainfo);
514                         break;
515                 case MONO_TYPE_I2:
516                 case MONO_TYPE_U2:
517                 case MONO_TYPE_CHAR:
518                         add_general (&gr, &stack_size, ainfo);
519                         break;
520                 case MONO_TYPE_I4:
521                 case MONO_TYPE_U4:
522                         add_general (&gr, &stack_size, ainfo);
523                         break;
524                 case MONO_TYPE_I:
525                 case MONO_TYPE_U:
526                 case MONO_TYPE_PTR:
527                 case MONO_TYPE_CLASS:
528                 case MONO_TYPE_OBJECT:
529                 case MONO_TYPE_STRING:
530                 case MONO_TYPE_SZARRAY:
531                 case MONO_TYPE_ARRAY:
532                         add_general (&gr, &stack_size, ainfo);
533                         break;
534                 case MONO_TYPE_VALUETYPE:
535                         if (ptype->data.klass->enumtype) {
536                                 ptype = ptype->data.klass->enum_basetype;
537                                 goto handle_enum;
538                         }
539
540                         add_valuetype (sig, ainfo, ptype, FALSE, &gr, &fr, &stack_size);
541                         break;
542                 case MONO_TYPE_TYPEDBYREF:
543                         stack_size += sizeof (MonoTypedRef);
544                         ainfo->storage = ArgOnStack;
545                         break;
546                 case MONO_TYPE_GENERICINST:
547                         ptype = ptype->data.generic_inst->generic_type;
548                         goto handle_enum;
549                 case MONO_TYPE_U8:
550                 case MONO_TYPE_I8:
551                         add_general (&gr, &stack_size, ainfo);
552                         break;
553                 case MONO_TYPE_R4:
554                         add_float (&fr, &stack_size, ainfo, FALSE);
555                         break;
556                 case MONO_TYPE_R8:
557                         add_float (&fr, &stack_size, ainfo, TRUE);
558                         break;
559                 default:
560                         g_assert_not_reached ();
561                 }
562         }
563
564         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
565                 gr = PARAM_REGS;
566                 fr = FLOAT_PARAM_REGS;
567                 
568                 /* Emit the signature cookie just before the implicit arguments */
569                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
570         }
571
572         if (stack_size & 0x8) {
573                 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
574                 cinfo->need_stack_align = TRUE;
575                 stack_size += 8;
576         }
577
578         cinfo->stack_usage = stack_size;
579         cinfo->reg_usage = gr;
580         cinfo->freg_usage = fr;
581         return cinfo;
582 }
583
584 /*
585  * mono_arch_get_argument_info:
586  * @csig:  a method signature
587  * @param_count: the number of parameters to consider
588  * @arg_info: an array to store the result infos
589  *
590  * Gathers information on parameters such as size, alignment and
591  * padding. arg_info should be large enought to hold param_count + 1 entries. 
592  *
593  * Returns the size of the activation frame.
594  */
595 int
596 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
597 {
598         int k;
599
600         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
601         if (csig->hasthis) {
602                 arg_info [0].offset = 0;
603         }
604
605         for (k = 0; k < param_count; k++) {
606                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
607                 /* FIXME: */
608                 arg_info [k + 1].size = 0;
609         }
610
611         /* FIXME: */
612         return 0;
613 }
614
615 static int 
616 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
617 {
618         return 0;
619 }
620
621 /*
622  * Initialize the cpu to execute managed code.
623  */
624 void
625 mono_arch_cpu_init (void)
626 {
627         guint16 fpcw;
628
629         /* spec compliance requires running with double precision */
630         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
631         fpcw &= ~X86_FPCW_PRECC_MASK;
632         fpcw |= X86_FPCW_PREC_DOUBLE;
633         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
634         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
635
636         mono_amd64_exceptions_init ();
637 }
638
639 /*
640  * This function returns the optimizations supported on this cpu.
641  */
642 guint32
643 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
644 {
645         int eax, ebx, ecx, edx;
646         guint32 opts = 0;
647
648         /* FIXME: AMD64 */
649
650         *exclude_mask = 0;
651         /* Feature Flags function, flags returned in EDX. */
652         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
653                 if (edx & (1 << 15)) {
654                         opts |= MONO_OPT_CMOV;
655                         if (edx & 1)
656                                 opts |= MONO_OPT_FCMOV;
657                         else
658                                 *exclude_mask |= MONO_OPT_FCMOV;
659                 } else
660                         *exclude_mask |= MONO_OPT_CMOV;
661         }
662         return opts;
663 }
664
665 static gboolean
666 is_regsize_var (MonoType *t) {
667         if (t->byref)
668                 return TRUE;
669         switch (t->type) {
670         case MONO_TYPE_I4:
671         case MONO_TYPE_U4:
672         case MONO_TYPE_I:
673         case MONO_TYPE_U:
674         case MONO_TYPE_PTR:
675                 return TRUE;
676         case MONO_TYPE_OBJECT:
677         case MONO_TYPE_STRING:
678         case MONO_TYPE_CLASS:
679         case MONO_TYPE_SZARRAY:
680         case MONO_TYPE_ARRAY:
681                 return TRUE;
682         case MONO_TYPE_VALUETYPE:
683                 if (t->data.klass->enumtype)
684                         return is_regsize_var (t->data.klass->enum_basetype);
685                 return FALSE;
686         }
687         return FALSE;
688 }
689
690 GList *
691 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
692 {
693         GList *vars = NULL;
694         int i;
695
696         for (i = 0; i < cfg->num_varinfo; i++) {
697                 MonoInst *ins = cfg->varinfo [i];
698                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
699
700                 /* unused vars */
701                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
702                         continue;
703
704                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
705                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
706                         continue;
707
708                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
709                  * 8bit quantities in caller saved registers on x86 */
710                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
711                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
712                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
713                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
714                         g_assert (i == vmv->idx);
715                         vars = g_list_prepend (vars, vmv);
716                 }
717         }
718
719         vars = mono_varlist_sort (cfg, vars, 0);
720
721         return vars;
722 }
723
724 GList *
725 mono_arch_get_global_int_regs (MonoCompile *cfg)
726 {
727         GList *regs = NULL;
728
729         /* We use the callee saved registers for global allocation */
730         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
731         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
732         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
733         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
734         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
735
736         return regs;
737 }
738
739 /*
740  * mono_arch_regalloc_cost:
741  *
742  *  Return the cost, in number of memory references, of the action of 
743  * allocating the variable VMV into a register during global register
744  * allocation.
745  */
746 guint32
747 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
748 {
749         MonoInst *ins = cfg->varinfo [vmv->idx];
750
751         if (cfg->method->save_lmf)
752                 /* The register is already saved */
753                 /* substract 1 for the invisible store in the prolog */
754                 return (ins->opcode == OP_ARG) ? 0 : 1;
755         else
756                 /* push+pop */
757                 return (ins->opcode == OP_ARG) ? 1 : 2;
758 }
759  
760 void
761 mono_arch_allocate_vars (MonoCompile *m)
762 {
763         MonoMethodSignature *sig;
764         MonoMethodHeader *header;
765         MonoInst *inst;
766         int i, offset, size, align, curinst;
767         CallInfo *cinfo;
768
769         header = ((MonoMethodNormal *)m->method)->header;
770
771         sig = m->method->signature;
772
773         cinfo = get_call_info (sig, FALSE);
774
775         /*
776          * We use the ABI calling conventions for managed code as well.
777          * Exception: valuetypes are never passed or returned in registers.
778          */
779
780         /* Locals are allocated backwards from %fp */
781         m->frame_reg = AMD64_RBP;
782         offset = 0;
783
784         /* Reserve space for caller saved registers */
785         for (i = 0; i < AMD64_NREG; ++i)
786                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (m->used_int_regs & (1 << i))) {
787                         offset += sizeof (gpointer);
788                 }
789
790         if (m->method->save_lmf) {
791                 /* Reserve stack space for saving LMF + argument regs */
792                 offset += sizeof (MonoLMF);
793                 if (lmf_tls_offset == -1)
794                         /* Need to save argument regs too */
795                         offset += (AMD64_NREG * 8) + (8 * 8);
796                 m->arch.lmf_offset = offset;
797         }
798
799         if (sig->ret->type != MONO_TYPE_VOID) {
800                 switch (cinfo->ret.storage) {
801                 case ArgInIReg:
802                 case ArgInFloatSSEReg:
803                 case ArgInDoubleSSEReg:
804                         if (((sig->ret->type == MONO_TYPE_VALUETYPE) && !sig->ret->data.klass->enumtype) || (sig->ret->type == MONO_TYPE_TYPEDBYREF)) {
805                                 /* The register is volatile */
806                                 m->ret->opcode = OP_REGOFFSET;
807                                 m->ret->inst_basereg = AMD64_RBP;
808                                 offset += 8;
809                                 m->ret->inst_offset = - offset;
810                         }
811                         else {
812                                 m->ret->opcode = OP_REGVAR;
813                                 m->ret->inst_c0 = cinfo->ret.reg;
814                         }
815                         break;
816                 default:
817                         g_assert_not_reached ();
818                 }
819                 m->ret->dreg = m->ret->inst_c0;
820         }
821
822         curinst = m->locals_start;
823         for (i = curinst; i < m->num_varinfo; ++i) {
824                 inst = m->varinfo [i];
825
826                 if (inst->opcode == OP_REGVAR) {
827                         //g_print ("allocating local %d to %s\n", i, mono_arch_regname (inst->dreg));
828                         continue;
829                 }
830
831                 /* inst->unused indicates native sized value types, this is used by the
832                 * pinvoke wrappers when they call functions returning structure */
833                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
834                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
835                 else
836                         size = mono_type_stack_size (inst->inst_vtype, &align);
837
838                 /*
839                  * variables are accessed as negative offsets from %fp, so increase
840                  * the offset before assigning it to a variable
841                  */
842                 offset += size;
843
844                 offset += align - 1;
845                 offset &= ~(align - 1);
846                 inst->opcode = OP_REGOFFSET;
847                 inst->inst_basereg = AMD64_RBP;
848                 inst->inst_offset = - offset;
849
850                 //g_print ("allocating local %d to [%s - %d]\n", i, mono_arch_regname (inst->inst_basereg), - inst->inst_offset);
851         }
852
853         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
854                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
855                 m->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
856         }
857
858         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
859                 inst = m->varinfo [i];
860                 if (inst->opcode != OP_REGVAR) {
861                         ArgInfo *ainfo = &cinfo->args [i];
862                         gboolean inreg = TRUE;
863                         MonoType *arg_type;
864
865                         if (sig->hasthis && (i == 0))
866                                 arg_type = &mono_defaults.object_class->byval_arg;
867                         else
868                                 arg_type = sig->params [i - sig->hasthis];
869
870                         /* FIXME: Allocate volatile arguments to registers */
871                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
872                                 inreg = FALSE;
873
874                         /* 
875                          * Under AMD64, all registers used to pass arguments to functions
876                          * are volatile across calls.
877                          * FIXME: Optimize this.
878                          */
879                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg))
880                                 inreg = FALSE;
881
882                         inst->opcode = OP_REGOFFSET;
883
884                         switch (ainfo->storage) {
885                         case ArgInIReg:
886                         case ArgInFloatSSEReg:
887                         case ArgInDoubleSSEReg:
888                                 inst->opcode = OP_REGVAR;
889                                 inst->dreg = ainfo->reg;
890                                 break;
891                         case ArgOnStack:
892                                 inst->opcode = OP_REGOFFSET;
893                                 inst->inst_basereg = AMD64_RBP;
894                                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
895                                 break;
896                         default:
897                                 NOT_IMPLEMENTED;
898                         }
899
900                         if (!inreg && (ainfo->storage != ArgOnStack)) {
901                                 inst->opcode = OP_REGOFFSET;
902                                 inst->inst_basereg = AMD64_RBP;
903                                 /* These arguments are saved to the stack in the prolog */
904                                 offset += 8;
905                                 inst->inst_offset = - offset;
906                         }
907                 }
908         }
909
910         m->stack_offset = offset;
911
912         g_free (cinfo);
913 }
914
915 static void
916 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, MonoInst *arg, ArgStorage storage, int reg, MonoInst *tree)
917 {
918         switch (storage) {
919         case ArgInIReg:
920                 arg->opcode = OP_OUTARG_REG;
921                 arg->inst_left = tree;
922                 arg->inst_right = (MonoInst*)call;
923                 arg->unused = reg;
924                 call->used_iregs |= 1 << reg;
925                 break;
926         case ArgInFloatSSEReg:
927                 /* FIXME: These are volatile as well */
928                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R4;
929                 arg->inst_left = tree;
930                 arg->unused = reg;
931                 break;
932         case ArgInDoubleSSEReg:
933                 arg->opcode = OP_AMD64_OUTARG_XMMREG_R8;
934                 arg->inst_left = tree;
935                 arg->unused = reg;
936                 break;
937         default:
938                 g_assert_not_reached ();
939         }
940 }
941
942 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
943  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
944  */
945
946 static int
947 arg_storage_to_ldind (ArgStorage storage)
948 {
949         switch (storage) {
950         case ArgInIReg:
951                 return CEE_LDIND_I;
952         case ArgInDoubleSSEReg:
953                 return CEE_LDIND_R8;
954         case ArgInFloatSSEReg:
955                 return CEE_LDIND_R4;
956         default:
957                 g_assert_not_reached ();
958         }
959
960         return -1;
961 }
962
963 /* 
964  * take the arguments and generate the arch-specific
965  * instructions to properly call the function in call.
966  * This includes pushing, moving arguments to the right register
967  * etc.
968  * Issue: who does the spilling if needed, and when?
969  */
970 MonoCallInst*
971 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
972         MonoInst *arg, *in;
973         MonoMethodSignature *sig;
974         int i, n, stack_size;
975         CallInfo *cinfo;
976         ArgInfo *ainfo;
977
978         stack_size = 0;
979
980         sig = call->signature;
981         n = sig->param_count + sig->hasthis;
982
983         cinfo = get_call_info (sig, sig->pinvoke);
984
985         for (i = 0; i < n; ++i) {
986                 ainfo = cinfo->args + i;
987
988                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
989                         MonoMethodSignature *tmp_sig;
990                         
991                         /* Emit the signature cookie just before the implicit arguments */
992                         MonoInst *sig_arg;
993                         /* FIXME: Add support for signature tokens to AOT */
994                         cfg->disable_aot = TRUE;
995
996                         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
997
998                         /*
999                          * mono_ArgIterator_Setup assumes the signature cookie is 
1000                          * passed first and all the arguments which were before it are
1001                          * passed on the stack after the signature. So compensate by 
1002                          * passing a different signature.
1003                          */
1004                         tmp_sig = mono_metadata_signature_dup (call->signature);
1005                         tmp_sig->param_count -= call->signature->sentinelpos;
1006                         tmp_sig->sentinelpos = 0;
1007                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1008
1009                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1010                         sig_arg->inst_p0 = tmp_sig;
1011
1012                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1013                         arg->inst_left = sig_arg;
1014                         arg->type = STACK_PTR;
1015
1016                         /* prepend, so they get reversed */
1017                         arg->next = call->out_args;
1018                         call->out_args = arg;
1019                 }
1020
1021                 if (is_virtual && i == 0) {
1022                         /* the argument will be attached to the call instruction */
1023                         in = call->args [i];
1024                 } else {
1025                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1026                         in = call->args [i];
1027                         arg->cil_code = in->cil_code;
1028                         arg->inst_left = in;
1029                         arg->type = in->type;
1030                         /* prepend, so they get reversed */
1031                         arg->next = call->out_args;
1032                         call->out_args = arg;
1033
1034                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(sig->params [i - sig->hasthis]))) {
1035                                 gint align;
1036                                 guint32 size;
1037
1038                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_TYPEDBYREF) {
1039                                         size = sizeof (MonoTypedRef);
1040                                         align = sizeof (gpointer);
1041                                 }
1042                                 else
1043                                 if (sig->pinvoke)
1044                                         size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1045                                 else
1046                                         size = mono_type_stack_size (&in->klass->byval_arg, &align);
1047                                 if (ainfo->storage == ArgValuetypeInReg) {
1048                                         if (ainfo->pair_storage [1] == ArgNone) {
1049                                                 MonoInst *load;
1050
1051                                                 /* Simpler case */
1052
1053                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1054                                                 load->inst_left = in;
1055
1056                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1057                                         }
1058                                         else {
1059                                                 /* Trees can't be shared so make a copy */
1060                                                 MonoInst *vtaddr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1061                                                 MonoInst *load, *load2, *offset_ins;
1062
1063                                                 /* Reg1 */
1064                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1065                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1066
1067                                                 NEW_ICONST (cfg, offset_ins, 0);
1068                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1069                                                 load2->inst_left = load;
1070                                                 load2->inst_right = offset_ins;
1071
1072                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [0]));
1073                                                 load->inst_left = load2;
1074
1075                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [0], ainfo->pair_regs [0], load);
1076
1077                                                 /* Reg2 */
1078                                                 MONO_INST_NEW (cfg, load, CEE_LDIND_I);
1079                                                 load->inst_i0 = (cfg)->varinfo [vtaddr->inst_c0];
1080
1081                                                 NEW_ICONST (cfg, offset_ins, 8);
1082                                                 MONO_INST_NEW (cfg, load2, CEE_ADD);
1083                                                 load2->inst_left = load;
1084                                                 load2->inst_right = offset_ins;
1085
1086                                                 MONO_INST_NEW (cfg, load, arg_storage_to_ldind (ainfo->pair_storage [1]));
1087                                                 load->inst_left = load2;
1088
1089                                                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
1090                                                 arg->cil_code = in->cil_code;
1091                                                 arg->type = in->type;
1092                                                 /* prepend, so they get reversed */
1093                                                 arg->next = call->out_args;
1094                                                 call->out_args = arg;
1095
1096                                                 add_outarg_reg (cfg, call, arg, ainfo->pair_storage [1], ainfo->pair_regs [1], load);
1097
1098                                                 /* Prepend a copy inst */
1099                                                 MONO_INST_NEW (cfg, arg, CEE_STIND_I);
1100                                                 arg->cil_code = in->cil_code;
1101                                                 arg->inst_left = vtaddr;
1102                                                 arg->inst_right = in;
1103                                                 arg->type = in->type;
1104
1105                                                 /* prepend, so they get reversed */
1106                                                 arg->next = call->out_args;
1107                                                 call->out_args = arg;
1108                                         }
1109                                 }
1110                                 else {
1111                                         arg->opcode = OP_OUTARG_VT;
1112                                         arg->klass = in->klass;
1113                                         arg->unused = sig->pinvoke;
1114                                         arg->inst_imm = size;
1115                                 }
1116                         }
1117                         else {
1118                                 switch (ainfo->storage) {
1119                                 case ArgInIReg:
1120                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1121                                         break;
1122                                 case ArgInFloatSSEReg:
1123                                 case ArgInDoubleSSEReg:
1124                                         add_outarg_reg (cfg, call, arg, ainfo->storage, ainfo->reg, in);
1125                                         break;
1126                                 case ArgOnStack:
1127                                         arg->opcode = OP_OUTARG;
1128                                         if (!sig->params [i - sig->hasthis]->byref) {
1129                                                 if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R4)
1130                                                         arg->opcode = OP_OUTARG_R4;
1131                                                 else
1132                                                         if (sig->params [i - sig->hasthis]->type == MONO_TYPE_R8)
1133                                                                 arg->opcode = OP_OUTARG_R8;
1134                                         }
1135                                         break;
1136                                 default:
1137                                         g_assert_not_reached ();
1138                                 }
1139                         }
1140                 }
1141         }
1142
1143         if (cinfo->need_stack_align) {
1144                 MONO_INST_NEW (cfg, arg, OP_AMD64_OUTARG_ALIGN_STACK);
1145                 /* prepend, so they get reversed */
1146                 arg->next = call->out_args;
1147                 call->out_args = arg;
1148         }
1149
1150         call->stack_usage = cinfo->stack_usage;
1151         cfg->param_area = MAX (cfg->param_area, call->stack_usage);
1152         cfg->flags |= MONO_CFG_HAS_CALLS;
1153
1154         g_free (cinfo);
1155
1156         return call;
1157 }
1158
1159 #define EMIT_COND_BRANCH(ins,cond,sign) \
1160 if (ins->flags & MONO_INST_BRLABEL) { \
1161         if (ins->inst_i0->inst_c0) { \
1162                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1163         } else { \
1164                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1165                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1166                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1167                         x86_branch8 (code, cond, 0, sign); \
1168                 else \
1169                         x86_branch32 (code, cond, 0, sign); \
1170         } \
1171 } else { \
1172         if (ins->inst_true_bb->native_offset) { \
1173                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1174         } else { \
1175                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1176                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1177                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1178                         x86_branch8 (code, cond, 0, sign); \
1179                 else \
1180                         x86_branch32 (code, cond, 0, sign); \
1181         } \
1182 }
1183
1184 /* emit an exception if condition is fail */
1185 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1186         do {                                                        \
1187                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1188                                     MONO_PATCH_INFO_EXC, exc_name);  \
1189                 x86_branch32 (code, cond, 0, signed);               \
1190         } while (0); 
1191
1192 #define EMIT_FPCOMPARE(code) do { \
1193         amd64_fcompp (code); \
1194         amd64_fnstsw (code); \
1195 } while (0); 
1196
1197 /*
1198  * Emitting a call and patching it later is expensive on amd64, so try to
1199  * determine the patch target immediately, and emit more efficient code if
1200  * possible.
1201  */
1202 static guint8*
1203 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1204 {
1205         /* FIXME: */
1206         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1207         amd64_set_reg_template (code, GP_SCRATCH_REG);
1208         amd64_call_reg (code, GP_SCRATCH_REG);
1209
1210         return code;
1211 }
1212
1213 #define EMIT_CALL() do { \
1214     amd64_set_reg_template (code, GP_SCRATCH_REG); \
1215     amd64_call_reg (code, GP_SCRATCH_REG); \
1216 } while (0);
1217
1218 /* FIXME: Add more instructions */
1219 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI8_MEMBASE_REG))
1220
1221 static void
1222 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1223 {
1224         MonoInst *ins, *last_ins = NULL;
1225         ins = bb->code;
1226
1227         while (ins) {
1228
1229                 switch (ins->opcode) {
1230                 case OP_ICONST:
1231                 case OP_I8CONST:
1232                         /* reg = 0 -> XOR (reg, reg) */
1233                         /* XOR sets cflags on x86, so we cant do it always */
1234                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1235                                 ins->opcode = CEE_XOR;
1236                                 ins->sreg1 = ins->dreg;
1237                                 ins->sreg2 = ins->dreg;
1238                         }
1239                         break;
1240                 case OP_MUL_IMM: 
1241                         /* remove unnecessary multiplication with 1 */
1242                         if (ins->inst_imm == 1) {
1243                                 if (ins->dreg != ins->sreg1) {
1244                                         ins->opcode = OP_MOVE;
1245                                 } else {
1246                                         last_ins->next = ins->next;
1247                                         ins = ins->next;
1248                                         continue;
1249                                 }
1250                         }
1251                         break;
1252                 case OP_COMPARE_IMM:
1253                         /* OP_COMPARE_IMM (reg, 0) 
1254                          * --> 
1255                          * OP_AMD64_TEST_NULL (reg) 
1256                          */
1257                         if (!ins->inst_imm)
1258                                 ins->opcode = OP_AMD64_TEST_NULL;
1259                         break;
1260                 case OP_ICOMPARE_IMM:
1261                         if (!ins->inst_imm)
1262                                 ins->opcode = OP_X86_TEST_NULL;
1263                         break;
1264                 case OP_X86_COMPARE_MEMBASE_IMM:
1265                         /* 
1266                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1267                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1268                          * -->
1269                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1270                          * OP_COMPARE_IMM reg, imm
1271                          *
1272                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1273                          */
1274                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1275                             ins->inst_basereg == last_ins->inst_destbasereg &&
1276                             ins->inst_offset == last_ins->inst_offset) {
1277                                         ins->opcode = OP_COMPARE_IMM;
1278                                         ins->sreg1 = last_ins->sreg1;
1279
1280                                         /* check if we can remove cmp reg,0 with test null */
1281                                         if (!ins->inst_imm)
1282                                                 ins->opcode = OP_X86_TEST_NULL;
1283                                 }
1284
1285                         break;
1286                 case OP_LOAD_MEMBASE:
1287                 case OP_LOADI4_MEMBASE:
1288                         /* 
1289                          * Note: if reg1 = reg2 the load op is removed
1290                          *
1291                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1292                          * OP_LOAD_MEMBASE offset(basereg), reg2
1293                          * -->
1294                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1295                          * OP_MOVE reg1, reg2
1296                          */
1297                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1298                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                 if (ins->dreg == last_ins->sreg1) {
1302                                         last_ins->next = ins->next;                             
1303                                         ins = ins->next;                                
1304                                         continue;
1305                                 } else {
1306                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1307                                         ins->opcode = OP_MOVE;
1308                                         ins->sreg1 = last_ins->sreg1;
1309                                 }
1310
1311                         /* 
1312                          * Note: reg1 must be different from the basereg in the second load
1313                          * Note: if reg1 = reg2 is equal then second load is removed
1314                          *
1315                          * OP_LOAD_MEMBASE offset(basereg), reg1
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_LOAD_MEMBASE offset(basereg), reg1
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1322                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1323                               ins->inst_basereg != last_ins->dreg &&
1324                               ins->inst_basereg == last_ins->inst_basereg &&
1325                               ins->inst_offset == last_ins->inst_offset) {
1326
1327                                 if (ins->dreg == last_ins->dreg) {
1328                                         last_ins->next = ins->next;                             
1329                                         ins = ins->next;                                
1330                                         continue;
1331                                 } else {
1332                                         ins->opcode = OP_MOVE;
1333                                         ins->sreg1 = last_ins->dreg;
1334                                 }
1335
1336                                 //g_assert_not_reached ();
1337
1338 #if 0
1339                         /* 
1340                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1341                          * OP_LOAD_MEMBASE offset(basereg), reg
1342                          * -->
1343                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1344                          * OP_ICONST reg, imm
1345                          */
1346                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1347                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1348                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1349                                    ins->inst_offset == last_ins->inst_offset) {
1350                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1351                                 ins->opcode = OP_ICONST;
1352                                 ins->inst_c0 = last_ins->inst_imm;
1353                                 g_assert_not_reached (); // check this rule
1354 #endif
1355                         }
1356                         break;
1357                 case OP_LOADU1_MEMBASE:
1358                 case OP_LOADI1_MEMBASE:
1359                         /* 
1360                          * Note: if reg1 = reg2 the load op is removed
1361                          *
1362                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1363                          * OP_LOAD_MEMBASE offset(basereg), reg2
1364                          * -->
1365                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1366                          * OP_MOVE reg1, reg2
1367                          */
1368                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1369                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1370                                         ins->inst_offset == last_ins->inst_offset) {
1371                                 if (ins->dreg == last_ins->sreg1) {
1372                                         last_ins->next = ins->next;                             
1373                                         ins = ins->next;                                
1374                                         continue;
1375                                 } else {
1376                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1377                                         ins->opcode = OP_MOVE;
1378                                         ins->sreg1 = last_ins->sreg1;
1379                                 }
1380                         }
1381                         break;
1382                 case OP_LOADU2_MEMBASE:
1383                 case OP_LOADI2_MEMBASE:
1384                         /* 
1385                          * Note: if reg1 = reg2 the load op is removed
1386                          *
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1388                          * OP_LOAD_MEMBASE offset(basereg), reg2
1389                          * -->
1390                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1391                          * OP_MOVE reg1, reg2
1392                          */
1393                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1394                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1395                                         ins->inst_offset == last_ins->inst_offset) {
1396                                 if (ins->dreg == last_ins->sreg1) {
1397                                         last_ins->next = ins->next;                             
1398                                         ins = ins->next;                                
1399                                         continue;
1400                                 } else {
1401                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1402                                         ins->opcode = OP_MOVE;
1403                                         ins->sreg1 = last_ins->sreg1;
1404                                 }
1405                         }
1406                         break;
1407                 case CEE_CONV_I4:
1408                 case CEE_CONV_U4:
1409                 case OP_MOVE:
1410                         /*
1411                          * Removes:
1412                          *
1413                          * OP_MOVE reg, reg 
1414                          */
1415                         if (ins->dreg == ins->sreg1) {
1416                                 if (last_ins)
1417                                         last_ins->next = ins->next;                             
1418                                 ins = ins->next;
1419                                 continue;
1420                         }
1421                         /* 
1422                          * Removes:
1423                          *
1424                          * OP_MOVE sreg, dreg 
1425                          * OP_MOVE dreg, sreg
1426                          */
1427                         if (last_ins && last_ins->opcode == OP_MOVE &&
1428                             ins->sreg1 == last_ins->dreg &&
1429                             ins->dreg == last_ins->sreg1) {
1430                                 last_ins->next = ins->next;                             
1431                                 ins = ins->next;                                
1432                                 continue;
1433                         }
1434                         break;
1435                 }
1436                 last_ins = ins;
1437                 ins = ins->next;
1438         }
1439         bb->last_ins = last_ins;
1440 }
1441
1442 static const int 
1443 branch_cc_table [] = {
1444         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1445         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1446         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1447 };
1448
1449 static int
1450 opcode_to_x86_cond (int opcode)
1451 {
1452         switch (opcode) {
1453         case OP_IBEQ:
1454                 return X86_CC_EQ;
1455         case OP_IBNE_UN:
1456                 return X86_CC_NE;
1457         case OP_IBLT:
1458                 return X86_CC_LT;
1459         case OP_IBLT_UN:
1460                 return X86_CC_LT;
1461         case OP_IBGT:
1462                 return X86_CC_GT;
1463         case OP_IBGT_UN:
1464                 return X86_CC_GT;
1465         case OP_IBGE:
1466                 return X86_CC_GE;
1467         case OP_IBGE_UN:
1468                 return X86_CC_GE;
1469         case OP_IBLE:
1470                 return X86_CC_LE;
1471         case OP_IBLE_UN:
1472                 return X86_CC_LE;
1473         case OP_COND_EXC_IOV:
1474                 return X86_CC_O;
1475         case OP_COND_EXC_IC:
1476                 return X86_CC_C;
1477         default:
1478                 g_assert_not_reached ();
1479         }
1480
1481         return -1;
1482 }
1483
1484 /*
1485  * returns the offset used by spillvar. It allocates a new
1486  * spill variable if necessary. 
1487  */
1488 static int
1489 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1490 {
1491         MonoSpillInfo **si, *info;
1492         int i = 0;
1493
1494         si = &cfg->spill_info; 
1495         
1496         while (i <= spillvar) {
1497
1498                 if (!*si) {
1499                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1500                         info->next = NULL;
1501                         cfg->stack_offset += sizeof (gpointer);
1502                         info->offset = - cfg->stack_offset;
1503                 }
1504
1505                 if (i == spillvar)
1506                         return (*si)->offset;
1507
1508                 i++;
1509                 si = &(*si)->next;
1510         }
1511
1512         g_assert_not_reached ();
1513         return 0;
1514 }
1515
1516 /*
1517  * returns the offset used by spillvar. It allocates a new
1518  * spill float variable if necessary. 
1519  * (same as mono_spillvar_offset but for float)
1520  */
1521 static int
1522 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1523 {
1524         MonoSpillInfo **si, *info;
1525         int i = 0;
1526
1527         si = &cfg->spill_info_float; 
1528         
1529         while (i <= spillvar) {
1530
1531                 if (!*si) {
1532                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1533                         info->next = NULL;
1534                         cfg->stack_offset += sizeof (double);
1535                         info->offset = - cfg->stack_offset;
1536                 }
1537
1538                 if (i == spillvar)
1539                         return (*si)->offset;
1540
1541                 i++;
1542                 si = &(*si)->next;
1543         }
1544
1545         g_assert_not_reached ();
1546         return 0;
1547 }
1548
1549 /*
1550  * Creates a store for spilled floating point items
1551  */
1552 static MonoInst*
1553 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1554 {
1555         MonoInst *store;
1556         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1557         store->sreg1 = reg;
1558         store->inst_destbasereg = AMD64_RBP;
1559         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1560
1561         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)store->inst_offset, reg));
1562         return store;
1563 }
1564
1565 /*
1566  * Creates a load for spilled floating point items 
1567  */
1568 static MonoInst*
1569 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1570 {
1571         MonoInst *load;
1572         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1573         load->dreg = reg;
1574         load->inst_basereg = AMD64_RBP;
1575         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1576
1577         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08lx(%%sp)) (from %d)\n", spill, (long)load->inst_offset, reg));
1578         return load;
1579 }
1580
1581 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && AMD64_IS_CALLEE_REG ((r)))
1582
1583 typedef struct {
1584         int born_in;
1585         int killed_in;
1586         int last_use;
1587         int prev_use;
1588         int flags;              /* used to track fp spill/load */
1589 } RegTrack;
1590
1591 static const char*const * ins_spec = amd64_desc;
1592
1593 static void
1594 print_ins (int i, MonoInst *ins)
1595 {
1596         const char *spec = ins_spec [ins->opcode];
1597         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1598         if (!spec)
1599                 g_error ("Unknown opcode: %s\n", mono_inst_name (ins->opcode));
1600         if (spec [MONO_INST_DEST]) {
1601                 if (ins->dreg >= MONO_MAX_IREGS)
1602                         g_print (" R%d <-", ins->dreg);
1603                 else
1604                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1605         }
1606         if (spec [MONO_INST_SRC1]) {
1607                 if (ins->sreg1 >= MONO_MAX_IREGS)
1608                         g_print (" R%d", ins->sreg1);
1609                 else
1610                         g_print (" %s", mono_arch_regname (ins->sreg1));
1611         }
1612         if (spec [MONO_INST_SRC2]) {
1613                 if (ins->sreg2 >= MONO_MAX_IREGS)
1614                         g_print (" R%d", ins->sreg2);
1615                 else
1616                         g_print (" %s", mono_arch_regname (ins->sreg2));
1617         }
1618         if (spec [MONO_INST_CLOB])
1619                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1620         g_print ("\n");
1621 }
1622
1623 static void
1624 print_regtrack (RegTrack *t, int num)
1625 {
1626         int i;
1627         char buf [32];
1628         const char *r;
1629         
1630         for (i = 0; i < num; ++i) {
1631                 if (!t [i].born_in)
1632                         continue;
1633                 if (i >= MONO_MAX_IREGS) {
1634                         g_snprintf (buf, sizeof(buf), "R%d", i);
1635                         r = buf;
1636                 } else
1637                         r = mono_arch_regname (i);
1638                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1639         }
1640 }
1641
1642 typedef struct InstList InstList;
1643
1644 struct InstList {
1645         InstList *prev;
1646         InstList *next;
1647         MonoInst *data;
1648 };
1649
1650 static inline InstList*
1651 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1652 {
1653         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1654         item->data = data;
1655         item->prev = NULL;
1656         item->next = list;
1657         if (list)
1658                 list->prev = item;
1659         return item;
1660 }
1661
1662 /*
1663  * Force the spilling of the variable in the symbolic register 'reg'.
1664  */
1665 static int
1666 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1667 {
1668         MonoInst *load;
1669         int i, sel, spill;
1670         
1671         sel = cfg->rs->iassign [reg];
1672         /*i = cfg->rs->isymbolic [sel];
1673         g_assert (i == reg);*/
1674         i = reg;
1675         spill = ++cfg->spill_count;
1676         cfg->rs->iassign [i] = -spill - 1;
1677         mono_regstate_free_int (cfg->rs, sel);
1678         /* we need to create a spill var and insert a load to sel after the current instruction */
1679         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1680         load->dreg = sel;
1681         load->inst_basereg = AMD64_RBP;
1682         load->inst_offset = mono_spillvar_offset (cfg, spill);
1683         if (item->prev) {
1684                 while (ins->next != item->prev->data)
1685                         ins = ins->next;
1686         }
1687         load->next = ins->next;
1688         ins->next = load;
1689         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1690         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1691         g_assert (i == sel);
1692
1693         return sel;
1694 }
1695
1696 static int
1697 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1698 {
1699         MonoInst *load;
1700         int i, sel, spill;
1701
1702         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1703         /* exclude the registers in the current instruction */
1704         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1705                 if (ins->sreg1 >= MONO_MAX_IREGS)
1706                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1707                 else
1708                         regmask &= ~ (1 << ins->sreg1);
1709                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1710         }
1711         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1712                 if (ins->sreg2 >= MONO_MAX_IREGS)
1713                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1714                 else
1715                         regmask &= ~ (1 << ins->sreg2);
1716                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1717         }
1718         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1719                 regmask &= ~ (1 << ins->dreg);
1720                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1721         }
1722
1723         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1724         g_assert (regmask); /* need at least a register we can free */
1725         sel = -1;
1726         /* we should track prev_use and spill the register that's farther */
1727         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1728                 if (regmask & (1 << i)) {
1729                         sel = i;
1730                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1731                         break;
1732                 }
1733         }
1734         i = cfg->rs->isymbolic [sel];
1735         spill = ++cfg->spill_count;
1736         cfg->rs->iassign [i] = -spill - 1;
1737         mono_regstate_free_int (cfg->rs, sel);
1738         /* we need to create a spill var and insert a load to sel after the current instruction */
1739         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1740         load->dreg = sel;
1741         load->inst_basereg = AMD64_RBP;
1742         load->inst_offset = mono_spillvar_offset (cfg, spill);
1743         if (item->prev) {
1744                 while (ins->next != item->prev->data)
1745                         ins = ins->next;
1746         }
1747         load->next = ins->next;
1748         ins->next = load;
1749         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08lx(%%ebp)) R%d (freed %s)\n", spill, (long)load->inst_offset, i, mono_arch_regname (sel)));
1750         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1751         g_assert (i == sel);
1752         
1753         return sel;
1754 }
1755
1756 static MonoInst*
1757 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1758 {
1759         MonoInst *copy;
1760         MONO_INST_NEW (cfg, copy, OP_MOVE);
1761         copy->dreg = dest;
1762         copy->sreg1 = src;
1763         if (ins) {
1764                 copy->next = ins->next;
1765                 ins->next = copy;
1766         }
1767         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1768         return copy;
1769 }
1770
1771 static MonoInst*
1772 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1773 {
1774         MonoInst *store;
1775         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1776         store->sreg1 = reg;
1777         store->inst_destbasereg = AMD64_RBP;
1778         store->inst_offset = mono_spillvar_offset (cfg, spill);
1779         if (ins) {
1780                 store->next = ins->next;
1781                 ins->next = store;
1782         }
1783         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08lx(%%ebp)) R%d (from %s)\n", spill, (long)store->inst_offset, prev_reg, mono_arch_regname (reg)));
1784         return store;
1785 }
1786
1787 static void
1788 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1789 {
1790         MonoInst *prev;
1791         if (item->next) {
1792                 prev = item->next->data;
1793
1794                 while (prev->next != ins)
1795                         prev = prev->next;
1796                 to_insert->next = ins;
1797                 prev->next = to_insert;
1798         } else {
1799                 to_insert->next = ins;
1800         }
1801         /* 
1802          * needed otherwise in the next instruction we can add an ins to the 
1803          * end and that would get past this instruction.
1804          */
1805         item->data = to_insert; 
1806 }
1807
1808
1809 #if  0
1810 static int
1811 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1812 {
1813         int val = cfg->rs->iassign [sym_reg];
1814         if (val < 0) {
1815                 int spill = 0;
1816                 if (val < -1) {
1817                         /* the register gets spilled after this inst */
1818                         spill = -val -1;
1819                 }
1820                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1821                 if (val < 0)
1822                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1823                 cfg->rs->iassign [sym_reg] = val;
1824                 /* add option to store before the instruction for src registers */
1825                 if (spill)
1826                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1827         }
1828         cfg->rs->isymbolic [val] = sym_reg;
1829         return val;
1830 }
1831 #endif
1832
1833 /* flags used in reginfo->flags */
1834 enum {
1835         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1836         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1837         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1838         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1839         MONO_X86_REG_EAX                                = 1 << 4,
1840         MONO_X86_REG_EDX                                = 1 << 5,
1841         MONO_X86_REG_ECX                                = 1 << 6
1842 };
1843
1844 static int
1845 mono_amd64_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1846 {
1847         int val;
1848         int test_mask = dest_mask;
1849
1850         if (flags & MONO_X86_REG_EAX)
1851                 test_mask &= (1 << AMD64_RAX);
1852         else if (flags & MONO_X86_REG_EDX)
1853                 test_mask &= (1 << AMD64_RDX);
1854         else if (flags & MONO_X86_REG_ECX)
1855                 test_mask &= (1 << AMD64_RCX);
1856         else if (flags & MONO_X86_REG_NOT_ECX)
1857                 test_mask &= ~ (1 << AMD64_RCX);
1858
1859         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1860         if (val >= 0 && test_mask != dest_mask)
1861                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1862
1863         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1864                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1865                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << AMD64_RCX)));
1866         }
1867
1868         if (val < 0) {
1869                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1870                 if (val < 0)
1871                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1872         }
1873
1874         return val;
1875 }
1876
1877
1878 /*#include "cprop.c"*/
1879
1880 /*
1881  * Local register allocation.
1882  * We first scan the list of instructions and we save the liveness info of
1883  * each register (when the register is first used, when it's value is set etc.).
1884  * We also reverse the list of instructions (in the InstList list) because assigning
1885  * registers backwards allows for more tricks to be used.
1886  */
1887 void
1888 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1889 {
1890         MonoInst *ins;
1891         MonoRegState *rs = cfg->rs;
1892         int i, val, fpcount;
1893         RegTrack *reginfo, *reginfof;
1894         RegTrack *reginfo1, *reginfo2, *reginfod;
1895         InstList *tmp, *reversed = NULL;
1896         const char *spec;
1897         guint32 src1_mask, src2_mask, dest_mask;
1898         GList *fspill_list = NULL;
1899         int fspill = 0;
1900
1901         if (!bb->code)
1902                 return;
1903         rs->next_vireg = bb->max_ireg;
1904         rs->next_vfreg = bb->max_freg;
1905         mono_regstate_assign (rs);
1906         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1907         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1908         rs->ifree_mask = AMD64_CALLEE_REGS;
1909
1910         ins = bb->code;
1911
1912         /*if (cfg->opt & MONO_OPT_COPYPROP)
1913                 local_copy_prop (cfg, ins);*/
1914
1915         i = 1;
1916         fpcount = 0;
1917         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1918         /* forward pass on the instructions to collect register liveness info */
1919         while (ins) {
1920                 spec = ins_spec [ins->opcode];
1921                 
1922                 DEBUG (print_ins (i, ins));
1923
1924                 if (spec [MONO_INST_SRC1]) {
1925                         if (spec [MONO_INST_SRC1] == 'f') {
1926                                 GList *spill;
1927                                 reginfo1 = reginfof;
1928
1929                                 spill = g_list_first (fspill_list);
1930                                 if (spill && fpcount < MONO_MAX_FREGS) {
1931                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1932                                         fspill_list = g_list_remove (fspill_list, spill->data);
1933                                 } else
1934                                         fpcount--;
1935                         }
1936                         else
1937                                 reginfo1 = reginfo;
1938                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1939                         reginfo1 [ins->sreg1].last_use = i;
1940                         if (spec [MONO_INST_SRC1] == 'L') {
1941                                 /* The virtual register is allocated sequentially */
1942                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1943                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1944                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1945                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1946
1947                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1948                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1949                         }
1950                 } else {
1951                         ins->sreg1 = -1;
1952                 }
1953                 if (spec [MONO_INST_SRC2]) {
1954                         if (spec [MONO_INST_SRC2] == 'f') {
1955                                 GList *spill;
1956                                 reginfo2 = reginfof;
1957                                 spill = g_list_first (fspill_list);
1958                                 if (spill) {
1959                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1960                                         fspill_list = g_list_remove (fspill_list, spill->data);
1961                                         if (fpcount >= MONO_MAX_FREGS) {
1962                                                 fspill++;
1963                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1964                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1965                                         }
1966                                 } else
1967                                         fpcount--;
1968                         }
1969                         else
1970                                 reginfo2 = reginfo;
1971                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1972                         reginfo2 [ins->sreg2].last_use = i;
1973                         if (spec [MONO_INST_SRC2] == 'L') {
1974                                 /* The virtual register is allocated sequentially */
1975                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1976                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1977                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1978                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1979                         }
1980                         if (spec [MONO_INST_CLOB] == 's') {
1981                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1982                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1983                         }
1984                 } else {
1985                         ins->sreg2 = -1;
1986                 }
1987                 if (spec [MONO_INST_DEST]) {
1988                         if (spec [MONO_INST_DEST] == 'f') {
1989                                 reginfod = reginfof;
1990                                 if (fpcount >= MONO_MAX_FREGS) {
1991                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1992                                         fspill++;
1993                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1994                                         fpcount--;
1995                                 }
1996                                 fpcount++;
1997                         }
1998                         else
1999                                 reginfod = reginfo;
2000                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
2001                                 reginfod [ins->dreg].killed_in = i;
2002                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
2003                         reginfod [ins->dreg].last_use = i;
2004                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
2005                                 reginfod [ins->dreg].born_in = i;
2006                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
2007                                 /* The virtual register is allocated sequentially */
2008                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
2009                                 reginfod [ins->dreg + 1].last_use = i;
2010                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
2011                                         reginfod [ins->dreg + 1].born_in = i;
2012
2013                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
2014                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
2015                         }
2016                 } else {
2017                         ins->dreg = -1;
2018                 }
2019
2020                 if (spec [MONO_INST_CLOB] == 'c') {
2021                         /* A call instruction implicitly uses all registers in call->out_reg_args */
2022
2023                         MonoCallInst *call = (MonoCallInst*)ins;
2024                         GSList *list;
2025
2026                         list = call->out_reg_args;
2027                         if (list) {
2028                                 while (list) {
2029                                         guint64 regpair;
2030                                         int reg, hreg;
2031
2032                                         regpair = (guint64) (list->data);
2033                                         hreg = regpair >> 32;
2034                                         reg = regpair & 0xffffffff;
2035
2036                                         reginfo [reg].prev_use = reginfo [reg].last_use;
2037                                         reginfo [reg].last_use = i;
2038
2039                                         list = g_slist_next (list);
2040                                 }
2041                         }
2042                 }
2043
2044                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2045                 ++i;
2046                 ins = ins->next;
2047         }
2048
2049         // todo: check if we have anything left on fp stack, in verify mode?
2050         fspill = 0;
2051
2052         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2053         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2054         tmp = reversed;
2055         while (tmp) {
2056                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2057                 dest_mask = src1_mask = src2_mask = AMD64_CALLEE_REGS;
2058                 --i;
2059                 ins = tmp->data;
2060                 spec = ins_spec [ins->opcode];
2061                 prev_dreg = -1;
2062                 clob_dreg = -1;
2063                 DEBUG (g_print ("processing:"));
2064                 DEBUG (print_ins (i, ins));
2065                 if (spec [MONO_INST_CLOB] == 's') {
2066                         if (rs->ifree_mask & (1 << AMD64_RCX)) {
2067                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2068                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2069                                         /* Argument already in hard reg, need to copy */
2070                                         MonoInst *copy = create_copy_ins (cfg, AMD64_RCX, ins->sreg2, NULL);
2071                                         insert_before_ins (ins, tmp, copy);
2072                                 }
2073                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2074                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2075                                 ins->sreg2 = AMD64_RCX;
2076                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2077                         } else {
2078                                 int need_ecx_spill = TRUE;
2079                                 /* 
2080                                  * we first check if src1/dreg is already assigned a register
2081                                  * and then we force a spill of the var assigned to ECX.
2082                                  */
2083                                 /* the destination register can't be ECX */
2084                                 dest_mask &= ~ (1 << AMD64_RCX);
2085                                 src1_mask &= ~ (1 << AMD64_RCX);
2086                                 val = rs->iassign [ins->dreg];
2087                                 /* 
2088                                  * the destination register is already assigned to ECX:
2089                                  * we need to allocate another register for it and then
2090                                  * copy from this to ECX.
2091                                  */
2092                                 if (val == AMD64_RCX && ins->dreg != ins->sreg2) {
2093                                         int new_dest;
2094                                         new_dest = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2095                                         g_assert (new_dest >= 0);
2096                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2097
2098                                         rs->isymbolic [new_dest] = ins->dreg;
2099                                         rs->iassign [ins->dreg] = new_dest;
2100                                         clob_dreg = ins->dreg;
2101                                         ins->dreg = new_dest;
2102                                         create_copy_ins (cfg, AMD64_RCX, new_dest, ins);
2103                                         need_ecx_spill = FALSE;
2104                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2105                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2106                                         rs->iassign [ins->dreg] = val;
2107                                         rs->isymbolic [val] = prev_dreg;
2108                                         ins->dreg = val;*/
2109                                 }
2110                                 val = rs->iassign [ins->sreg2];
2111                                 if (val >= 0 && val != AMD64_RCX) {
2112                                         MonoInst *move = create_copy_ins (cfg, AMD64_RCX, val, NULL);
2113                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2114                                         move->next = ins;
2115                                         g_assert_not_reached ();
2116                                         /* FIXME: where is move connected to the instruction list? */
2117                                         //tmp->prev->data->next = move;
2118                                 }
2119                                 else 
2120                                         if (val == AMD64_RCX) {
2121                                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2122                                                         /* sreg2 is already assigned to a hard reg, need to copy */
2123                                                         MonoInst *copy = create_copy_ins (cfg, AMD64_RCX, ins->sreg2, NULL);
2124                                                         insert_before_ins (ins, tmp, copy);
2125                                                 }
2126                                                 need_ecx_spill = FALSE;
2127                                         }
2128                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << AMD64_RCX))) {
2129                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RCX]));
2130                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RCX]);
2131                                         mono_regstate_free_int (rs, AMD64_RCX);
2132                                 }
2133                                 /* force-set sreg2 */
2134                                 rs->iassign [ins->sreg2] = AMD64_RCX;
2135                                 rs->isymbolic [AMD64_RCX] = ins->sreg2;
2136                                 ins->sreg2 = AMD64_RCX;
2137                                 rs->ifree_mask &= ~ (1 << AMD64_RCX);
2138                         }
2139                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
2140                         int dest_reg = AMD64_RAX;
2141                         int clob_reg = AMD64_RDX;
2142                         if (spec [MONO_INST_DEST] == 'd') {
2143                                 dest_reg = AMD64_RDX; /* reminder */
2144                                 clob_reg = AMD64_RAX;
2145                         }
2146                         val = rs->iassign [ins->dreg];
2147                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2148                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2149                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2150                                 mono_regstate_free_int (rs, dest_reg);
2151                         }
2152                         if (val < 0) {
2153                                 if (val < -1) {
2154                                         /* the register gets spilled after this inst */
2155                                         int spill = -val -1;
2156                                         dest_mask = 1 << clob_reg;
2157                                         prev_dreg = ins->dreg;
2158                                         val = mono_regstate_alloc_int (rs, dest_mask);
2159                                         if (val < 0)
2160                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2161                                         rs->iassign [ins->dreg] = val;
2162                                         if (spill)
2163                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2164                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2165                                         rs->isymbolic [val] = prev_dreg;
2166                                         ins->dreg = val;
2167                                         if (val != dest_reg) { /* force a copy */
2168                                                 create_copy_ins (cfg, val, dest_reg, ins);
2169                                         }
2170                                 } else {
2171                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2172                                         prev_dreg = ins->dreg;
2173                                         rs->iassign [ins->dreg] = dest_reg;
2174                                         rs->isymbolic [dest_reg] = ins->dreg;
2175                                         ins->dreg = dest_reg;
2176                                         rs->ifree_mask &= ~ (1 << dest_reg);
2177                                 }
2178                         } else {
2179                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2180                                 if (val != dest_reg) { /* force a copy */
2181                                         create_copy_ins (cfg, val, dest_reg, ins);
2182                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2183                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2184                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2185                                                 mono_regstate_free_int (rs, dest_reg);
2186                                         }
2187                                 }
2188                         }
2189                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2190                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2191                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2192                                 mono_regstate_free_int (rs, clob_reg);
2193                         }
2194                         src1_mask = 1 << AMD64_RAX;
2195                         src2_mask = 1 << AMD64_RCX;
2196                 }
2197                 if (spec [MONO_INST_DEST] == 'l') {
2198                         int hreg;
2199                         val = rs->iassign [ins->dreg];
2200                         /* check special case when dreg have been moved from ecx (clob shift) */
2201                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2202                                 hreg = clob_dreg + 1;
2203                         else
2204                                 hreg = ins->dreg + 1;
2205
2206                         /* base prev_dreg on fixed hreg, handle clob case */
2207                         val = hreg - 1;
2208
2209                         if (val != rs->isymbolic [AMD64_RAX] && !(rs->ifree_mask & (1 << AMD64_RAX))) {
2210                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2211                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2212                                 mono_regstate_free_int (rs, AMD64_RAX);
2213                         }
2214                         if (hreg != rs->isymbolic [AMD64_RDX] && !(rs->ifree_mask & (1 << AMD64_RDX))) {
2215                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [AMD64_RDX]));
2216                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RDX]);
2217                                 mono_regstate_free_int (rs, AMD64_RDX);
2218                         }
2219                 }
2220
2221                 /* Track dreg */
2222                 if (spec [MONO_INST_DEST] == 'f') {
2223                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2224                                 GList *spill_node;
2225                                 MonoInst *store;
2226                                 spill_node = g_list_first (fspill_list);
2227                                 g_assert (spill_node);
2228
2229                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2230                                 insert_before_ins (ins, tmp, store);
2231                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2232                                 fspill--;
2233                         }
2234                 } else if (spec [MONO_INST_DEST] == 'L') {
2235                         int hreg;
2236                         val = rs->iassign [ins->dreg];
2237                         /* check special case when dreg have been moved from ecx (clob shift) */
2238                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2239                                 hreg = clob_dreg + 1;
2240                         else
2241                                 hreg = ins->dreg + 1;
2242
2243                         /* base prev_dreg on fixed hreg, handle clob case */
2244                         prev_dreg = hreg - 1;
2245
2246                         if (val < 0) {
2247                                 int spill = 0;
2248                                 if (val < -1) {
2249                                         /* the register gets spilled after this inst */
2250                                         spill = -val -1;
2251                                 }
2252                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2253                                 rs->iassign [ins->dreg] = val;
2254                                 if (spill)
2255                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2256                         }
2257
2258                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2259  
2260                         rs->isymbolic [val] = hreg - 1;
2261                         ins->dreg = val;
2262                         
2263                         val = rs->iassign [hreg];
2264                         if (val < 0) {
2265                                 int spill = 0;
2266                                 if (val < -1) {
2267                                         /* the register gets spilled after this inst */
2268                                         spill = -val -1;
2269                                 }
2270                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2271                                 rs->iassign [hreg] = val;
2272                                 if (spill)
2273                                         create_spilled_store (cfg, spill, val, hreg, ins);
2274                         }
2275
2276                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2277                         rs->isymbolic [val] = hreg;
2278                         /* save reg allocating into unused */
2279                         ins->unused = val;
2280
2281                         /* check if we can free our long reg */
2282                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2283                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2284                                 mono_regstate_free_int (rs, val);
2285                         }
2286                 }
2287                 else if (ins->dreg >= MONO_MAX_IREGS) {
2288                         int hreg;
2289                         val = rs->iassign [ins->dreg];
2290                         if (spec [MONO_INST_DEST] == 'l') {
2291                                 /* check special case when dreg have been moved from ecx (clob shift) */
2292                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2293                                         hreg = clob_dreg + 1;
2294                                 else
2295                                         hreg = ins->dreg + 1;
2296
2297                                 /* base prev_dreg on fixed hreg, handle clob case */
2298                                 prev_dreg = hreg - 1;
2299                         } else
2300                                 prev_dreg = ins->dreg;
2301
2302                         if (val < 0) {
2303                                 int spill = 0;
2304                                 if (val < -1) {
2305                                         /* the register gets spilled after this inst */
2306                                         spill = -val -1;
2307                                 }
2308                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2309                                 rs->iassign [ins->dreg] = val;
2310                                 if (spill)
2311                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2312                         }
2313                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2314                         rs->isymbolic [val] = prev_dreg;
2315                         ins->dreg = val;
2316                         /* handle cases where lreg needs to be eax:edx */
2317                         if (spec [MONO_INST_DEST] == 'l') {
2318                                 /* check special case when dreg have been moved from ecx (clob shift) */
2319                                 int hreg = prev_dreg + 1;
2320                                 val = rs->iassign [hreg];
2321                                 if (val < 0) {
2322                                         int spill = 0;
2323                                         if (val < -1) {
2324                                                 /* the register gets spilled after this inst */
2325                                                 spill = -val -1;
2326                                         }
2327                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2328                                         rs->iassign [hreg] = val;
2329                                         if (spill)
2330                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2331                                 }
2332                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2333                                 rs->isymbolic [val] = hreg;
2334                                 if (ins->dreg == AMD64_RAX) {
2335                                         if (val != AMD64_RDX)
2336                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2337                                 } else if (ins->dreg == AMD64_RDX) {
2338                                         if (val == AMD64_RAX) {
2339                                                 /* swap */
2340                                                 g_assert_not_reached ();
2341                                         } else {
2342                                                 /* two forced copies */
2343                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2344                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2345                                         }
2346                                 } else {
2347                                         if (val == AMD64_RDX) {
2348                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2349                                         } else {
2350                                                 /* two forced copies */
2351                                                 create_copy_ins (cfg, val, AMD64_RDX, ins);
2352                                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2353                                         }
2354                                 }
2355                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2356                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2357                                         mono_regstate_free_int (rs, val);
2358                                 }
2359                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != AMD64_RAX && spec [MONO_INST_CLOB] != 'd') {
2360                                 /* this instruction only outputs to EAX, need to copy */
2361                                 create_copy_ins (cfg, ins->dreg, AMD64_RAX, ins);
2362                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != AMD64_RDX && spec [MONO_INST_CLOB] != 'd') {
2363                                 create_copy_ins (cfg, ins->dreg, AMD64_RDX, ins);
2364                         }
2365                 }
2366                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2367                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2368                         mono_regstate_free_int (rs, ins->dreg);
2369                 }
2370                 /* put src1 in EAX if it needs to be */
2371                 if (spec [MONO_INST_SRC1] == 'a') {
2372                         if (!(rs->ifree_mask & (1 << AMD64_RAX))) {
2373                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [AMD64_RAX]));
2374                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [AMD64_RAX]);
2375                                 mono_regstate_free_int (rs, AMD64_RAX);
2376                         }
2377                         if (ins->sreg1 < MONO_MAX_IREGS) {
2378                                 /* The argument is already in a hard reg, need to copy */
2379                                 MonoInst *copy = create_copy_ins (cfg, AMD64_RAX, ins->sreg1, NULL);
2380                                 insert_before_ins (ins, tmp, copy);
2381                         }
2382                         /* force-set sreg1 */
2383                         rs->iassign [ins->sreg1] = AMD64_RAX;
2384                         rs->isymbolic [AMD64_RAX] = ins->sreg1;
2385                         ins->sreg1 = AMD64_RAX;
2386                         rs->ifree_mask &= ~ (1 << AMD64_RAX);
2387                 }
2388
2389                 /* Track sreg1 */
2390                 if (spec [MONO_INST_SRC1] == 'f') {
2391                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2392                                 MonoInst *load;
2393                                 MonoInst *store = NULL;
2394
2395                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2396                                         GList *spill_node;
2397                                         spill_node = g_list_first (fspill_list);
2398                                         g_assert (spill_node);
2399
2400                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2401                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2402                                 }
2403
2404                                 fspill++;
2405                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2406                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2407                                 insert_before_ins (ins, tmp, load);
2408                                 if (store) 
2409                                         insert_before_ins (load, tmp, store);
2410                         }
2411                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2412                         /* force source to be same as dest */
2413                         rs->iassign [ins->sreg1] = ins->dreg;
2414                         rs->iassign [ins->sreg1 + 1] = ins->unused;
2415
2416                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2417                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2418
2419                         ins->sreg1 = ins->dreg;
2420                         /* 
2421                          * No need for saving the reg, we know that src1=dest in this cases
2422                          * ins->inst_c0 = ins->unused;
2423                          */
2424
2425                         /* make sure that we remove them from free mask */
2426                         rs->ifree_mask &= ~ (1 << ins->dreg);
2427                         rs->ifree_mask &= ~ (1 << ins->unused);
2428                 }
2429                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2430                         val = rs->iassign [ins->sreg1];
2431                         prev_sreg1 = ins->sreg1;
2432                         if (val < 0) {
2433                                 int spill = 0;
2434                                 if (val < -1) {
2435                                         /* the register gets spilled after this inst */
2436                                         spill = -val -1;
2437                                 }
2438                                 if (0 && (ins->opcode == OP_MOVE)) {
2439                                         /* 
2440                                          * small optimization: the dest register is already allocated
2441                                          * but the src one is not: we can simply assign the same register
2442                                          * here and peephole will get rid of the instruction later.
2443                                          * This optimization may interfere with the clobbering handling:
2444                                          * it removes a mov operation that will be added again to handle clobbering.
2445                                          * There are also some other issues that should with make testjit.
2446                                          */
2447                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2448                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2449                                         //g_assert (val >= 0);
2450                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2451                                 } else {
2452                                         //g_assert (val == -1); /* source cannot be spilled */
2453                                         val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2454                                         rs->iassign [ins->sreg1] = val;
2455                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2456                                 }
2457                                 if (spill) {
2458                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2459                                         insert_before_ins (ins, tmp, store);
2460                                 }
2461                         }
2462                         rs->isymbolic [val] = prev_sreg1;
2463                         ins->sreg1 = val;
2464                 } else {
2465                         prev_sreg1 = -1;
2466                 }
2467                 /* handle clobbering of sreg1 */
2468                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2469                         MonoInst *sreg2_copy = NULL;
2470
2471                         if (ins->dreg == ins->sreg2) {
2472                                 /* 
2473                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2474                                  * register for it.
2475                                  */
2476                                 int reg2 = mono_amd64_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2477
2478                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2479                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2480                                 prev_sreg2 = ins->sreg2 = reg2;
2481                         }
2482
2483                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2484                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2485                         insert_before_ins (ins, tmp, copy);
2486
2487                         if (sreg2_copy)
2488                                 insert_before_ins (copy, tmp, sreg2_copy);
2489
2490                         /* we set sreg1 to dest as well */
2491                         prev_sreg1 = ins->sreg1 = ins->dreg;
2492                         src2_mask &= ~ (1 << ins->dreg);
2493                 }
2494                 /* track sreg2 */
2495                 if (spec [MONO_INST_SRC2] == 'f') {
2496                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2497                                 MonoInst *load;
2498                                 MonoInst *store = NULL;
2499
2500                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2501                                         GList *spill_node;
2502
2503                                         spill_node = g_list_first (fspill_list);
2504                                         g_assert (spill_node);
2505                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2506                                                 spill_node = g_list_next (spill_node);
2507         
2508                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2509                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2510                                 } 
2511                                 
2512                                 fspill++;
2513                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2514                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2515                                 insert_before_ins (ins, tmp, load);
2516                                 if (store) 
2517                                         insert_before_ins (load, tmp, store);
2518                         }
2519                 } 
2520                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2521                         val = rs->iassign [ins->sreg2];
2522                         prev_sreg2 = ins->sreg2;
2523                         if (val < 0) {
2524                                 int spill = 0;
2525                                 if (val < -1) {
2526                                         /* the register gets spilled after this inst */
2527                                         spill = -val -1;
2528                                 }
2529                                 val = mono_amd64_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2530                                 rs->iassign [ins->sreg2] = val;
2531                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2532                                 if (spill)
2533                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2534                         }
2535                         rs->isymbolic [val] = prev_sreg2;
2536                         ins->sreg2 = val;
2537                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != AMD64_RCX) {
2538                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [AMD64_RCX]));
2539                         }
2540                 } else {
2541                         prev_sreg2 = -1;
2542                 }
2543
2544                 if (spec [MONO_INST_CLOB] == 'c') {
2545                         int j, s;
2546                         MonoCallInst *call = (MonoCallInst*)ins;
2547                         GSList *list;
2548                         guint32 clob_mask = AMD64_CALLEE_REGS;
2549
2550                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2551                                 s = 1 << j;
2552                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2553                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [j]);
2554                                         mono_regstate_free_int (rs, j);
2555                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2556                                 }
2557                         }
2558
2559                         /* 
2560                          * Assign all registers in call->out_reg_args to the proper 
2561                          * argument registers.
2562                          */
2563
2564                         list = call->out_reg_args;
2565                         if (list) {
2566                                 while (list) {
2567                                         guint64 regpair;
2568                                         int reg, hreg;
2569
2570                                         regpair = (guint64) (list->data);
2571                                         hreg = regpair >> 32;
2572                                         reg = regpair & 0xffffffff;
2573
2574                                         rs->iassign [reg] = hreg;
2575                                         rs->isymbolic [hreg] = reg;
2576                                         rs->ifree_mask &= ~ (1 << hreg);
2577
2578                                         list = g_slist_next (list);
2579                                 }
2580                                 g_slist_free (call->out_reg_args);
2581                         }
2582                 }
2583
2584                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2585                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2586                         mono_regstate_free_int (rs, ins->sreg1);
2587                 }
2588                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2589                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2590                         mono_regstate_free_int (rs, ins->sreg2);
2591                 }*/
2592         
2593                 DEBUG (print_ins (i, ins));
2594                 /* this may result from a insert_before call */
2595                 if (!tmp->next)
2596                         bb->code = tmp->data;
2597                 tmp = tmp->next;
2598         }
2599
2600         g_free (reginfo);
2601         g_free (reginfof);
2602         g_list_free (fspill_list);
2603 }
2604
2605 static unsigned char*
2606 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2607 {
2608         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
2609         x86_fnstcw_membase(code, AMD64_RSP, 0);
2610         amd64_mov_reg_membase (code, dreg, AMD64_RSP, 0, 2);
2611         amd64_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2612         amd64_mov_membase_reg (code, AMD64_RSP, 2, dreg, 2);
2613         amd64_fldcw_membase (code, AMD64_RSP, 2);
2614         amd64_push_reg (code, AMD64_RAX); // SP = SP - 8
2615         amd64_fist_pop_membase (code, AMD64_RSP, 0, size == 8);
2616         amd64_pop_reg (code, dreg);
2617         amd64_fldcw_membase (code, AMD64_RSP, 0);
2618         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2619
2620         if (size == 1)
2621                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
2622         else if (size == 2)
2623                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
2624         return code;
2625 }
2626
2627 static unsigned char*
2628 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2629 {
2630         int sreg = tree->sreg1;
2631 #ifdef PLATFORM_WIN32
2632         guint8* br[5];
2633
2634         NOT_IMPLEMENTED;
2635
2636         /*
2637          * Under Windows:
2638          * If requested stack size is larger than one page,
2639          * perform stack-touch operation
2640          */
2641         /*
2642          * Generate stack probe code.
2643          * Under Windows, it is necessary to allocate one page at a time,
2644          * "touching" stack after each successful sub-allocation. This is
2645          * because of the way stack growth is implemented - there is a
2646          * guard page before the lowest stack page that is currently commited.
2647          * Stack normally grows sequentially so OS traps access to the
2648          * guard page and commits more pages when needed.
2649          */
2650         amd64_test_reg_imm (code, sreg, ~0xFFF);
2651         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2652
2653         br[2] = code; /* loop */
2654         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
2655         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
2656         amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2657         amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2658         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2659         amd64_patch (br[3], br[2]);
2660         amd64_test_reg_reg (code, sreg, sreg);
2661         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2662         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2663
2664         br[1] = code; x86_jump8 (code, 0);
2665
2666         amd64_patch (br[0], code);
2667         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
2668         amd64_patch (br[1], code);
2669         amd64_patch (br[4], code);
2670 #else /* PLATFORM_WIN32 */
2671         amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
2672 #endif
2673         if (tree->flags & MONO_INST_INIT) {
2674                 int offset = 0;
2675                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
2676                         amd64_push_reg (code, AMD64_RAX);
2677                         offset += 8;
2678                 }
2679                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
2680                         amd64_push_reg (code, AMD64_RCX);
2681                         offset += 8;
2682                 }
2683                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
2684                         amd64_push_reg (code, AMD64_RDI);
2685                         offset += 8;
2686                 }
2687                 
2688                 amd64_shift_reg_imm (code, X86_SHR, sreg, 4);
2689                 if (sreg != AMD64_RCX)
2690                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
2691                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
2692                                 
2693                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
2694                 amd64_cld (code);
2695                 amd64_prefix (code, X86_REP_PREFIX);
2696                 amd64_stosl (code);
2697                 
2698                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
2699                         amd64_pop_reg (code, AMD64_RDI);
2700                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
2701                         amd64_pop_reg (code, AMD64_RCX);
2702                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
2703                         amd64_pop_reg (code, AMD64_RAX);
2704         }
2705         return code;
2706 }
2707
2708 static guint8*
2709 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2710 {
2711         CallInfo *cinfo;
2712         guint32 offset, quad;
2713
2714         /* Move return value to the target register */
2715         /* FIXME: do this in the local reg allocator */
2716         switch (ins->opcode) {
2717         case CEE_CALL:
2718         case OP_CALL_REG:
2719         case OP_CALL_MEMBASE:
2720         case OP_LCALL:
2721         case OP_LCALL_REG:
2722         case OP_LCALL_MEMBASE:
2723                 if (ins->dreg != AMD64_RAX)
2724                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, 8);
2725                 break;
2726         case OP_FCALL:
2727         case OP_FCALL_REG:
2728         case OP_FCALL_MEMBASE:
2729                 /* FIXME: optimize this */
2730                 offset = mono_spillvar_offset_float (cfg, 0);
2731                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4) {
2732                         amd64_movss_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2733                         amd64_fld_membase (code, AMD64_RBP, offset, FALSE);
2734                 }
2735                 else {
2736                         amd64_movsd_membase_reg (code, AMD64_RBP, offset, AMD64_XMM0);
2737                         amd64_fld_membase (code, AMD64_RBP, offset, TRUE);
2738                 }
2739                 break;
2740         case OP_VCALL:
2741         case OP_VCALL_REG:
2742         case OP_VCALL_MEMBASE:
2743                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2744                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2745                         /* Pop the destination address from the stack */
2746                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
2747                         amd64_pop_reg (code, AMD64_RCX);
2748                         
2749                         for (quad = 0; quad < 2; quad ++) {
2750                                 switch (cinfo->ret.pair_storage [quad]) {
2751                                 case ArgInIReg:
2752                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad], 8);
2753                                         break;
2754                                 case ArgInFloatSSEReg:
2755                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2756                                         break;
2757                                 case ArgInDoubleSSEReg:
2758                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
2759                                         break;
2760                                 case ArgNone:
2761                                         break;
2762                                 default:
2763                                         NOT_IMPLEMENTED;
2764                                 }
2765                         }
2766                 }
2767                 break;
2768         }
2769
2770         return code;
2771 }
2772
2773 /*
2774  * emit_load_volatile_arguments:
2775  *
2776  *  Load volatile arguments from the stack to the original input registers.
2777  * Required before a tail call.
2778  */
2779 static guint8*
2780 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2781 {
2782         MonoMethod *method = cfg->method;
2783         MonoMethodSignature *sig;
2784         MonoInst *inst;
2785         CallInfo *cinfo;
2786         guint32 i;
2787
2788         /* FIXME: Generate intermediate code instead */
2789
2790         sig = method->signature;
2791
2792         cinfo = get_call_info (sig, FALSE);
2793         
2794         /* This is the opposite of the code in emit_prolog */
2795
2796         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2797                 ArgInfo *ainfo = cinfo->args + i;
2798                 MonoType *arg_type;
2799                 inst = cfg->varinfo [i];
2800
2801                 if (sig->hasthis && (i == 0))
2802                         arg_type = &mono_defaults.object_class->byval_arg;
2803                 else
2804                         arg_type = sig->params [i - sig->hasthis];
2805
2806                 if (inst->opcode != OP_REGVAR) {
2807                         switch (ainfo->storage) {
2808                         case ArgInIReg: {
2809                                 guint32 size = 8;
2810
2811                                 /* FIXME: I1 etc */
2812                                 amd64_mov_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset, size);
2813                                 break;
2814                         }
2815                         case ArgInFloatSSEReg:
2816                                 amd64_movss_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2817                                 break;
2818                         case ArgInDoubleSSEReg:
2819                                 amd64_movsd_reg_membase (code, ainfo->reg, inst->inst_basereg, inst->inst_offset);
2820                                 break;
2821                         default:
2822                                 break;
2823                         }
2824                 }
2825         }
2826
2827         g_free (cinfo);
2828
2829         return code;
2830 }
2831
2832 #define REAL_PRINT_REG(text,reg) \
2833 mono_assert (reg >= 0); \
2834 amd64_push_reg (code, AMD64_RAX); \
2835 amd64_push_reg (code, AMD64_RDX); \
2836 amd64_push_reg (code, AMD64_RCX); \
2837 amd64_push_reg (code, reg); \
2838 amd64_push_imm (code, reg); \
2839 amd64_push_imm (code, text " %d %p\n"); \
2840 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2841 amd64_call_reg (code, AMD64_RAX); \
2842 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2843 amd64_pop_reg (code, AMD64_RCX); \
2844 amd64_pop_reg (code, AMD64_RDX); \
2845 amd64_pop_reg (code, AMD64_RAX);
2846
2847 /* benchmark and set based on cpu */
2848 #define LOOP_ALIGNMENT 8
2849 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2850
2851 void
2852 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2853 {
2854         MonoInst *ins;
2855         MonoCallInst *call;
2856         guint offset;
2857         guint8 *code = cfg->native_code + cfg->code_len;
2858         MonoInst *last_ins = NULL;
2859         guint last_offset = 0;
2860         int max_len, cpos;
2861
2862         if (cfg->opt & MONO_OPT_PEEPHOLE)
2863                 peephole_pass (cfg, bb);
2864
2865         if (cfg->opt & MONO_OPT_LOOP) {
2866                 int pad, align = LOOP_ALIGNMENT;
2867                 /* set alignment depending on cpu */
2868                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2869                         pad = align - pad;
2870                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2871                         amd64_padding (code, pad);
2872                         cfg->code_len += pad;
2873                         bb->native_offset = cfg->code_len;
2874                 }
2875         }
2876
2877         if (cfg->verbose_level > 2)
2878                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2879
2880         cpos = bb->max_offset;
2881
2882         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2883                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2884                 g_assert (!mono_compile_aot);
2885                 cpos += 6;
2886
2887                 cov->data [bb->dfn].cil_code = bb->cil_code;
2888                 /* this is not thread save, but good enough */
2889                 amd64_inc_mem (code, (guint64)&cov->data [bb->dfn].count); 
2890         }
2891
2892         offset = code - cfg->native_code;
2893
2894         ins = bb->code;
2895         while (ins) {
2896                 offset = code - cfg->native_code;
2897
2898                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2899
2900                 if (offset > (cfg->code_size - max_len - 16)) {
2901                         cfg->code_size *= 2;
2902                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2903                         code = cfg->native_code + offset;
2904                         mono_jit_stats.code_reallocs++;
2905                 }
2906
2907                 mono_debug_record_line_number (cfg, ins, offset);
2908
2909                 switch (ins->opcode) {
2910                 case OP_BIGMUL:
2911                         amd64_mul_reg (code, ins->sreg2, TRUE);
2912                         break;
2913                 case OP_BIGMUL_UN:
2914                         amd64_mul_reg (code, ins->sreg2, FALSE);
2915                         break;
2916                 case OP_X86_SETEQ_MEMBASE:
2917                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2918                         break;
2919                 case OP_STOREI1_MEMBASE_IMM:
2920                         g_assert (amd64_is_imm32 (ins->inst_imm));
2921                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2922                         break;
2923                 case OP_STOREI2_MEMBASE_IMM:
2924                         g_assert (amd64_is_imm32 (ins->inst_imm));
2925                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2926                         break;
2927                 case OP_STOREI4_MEMBASE_IMM:
2928                         g_assert (amd64_is_imm32 (ins->inst_imm));
2929                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2930                         break;
2931                 case OP_STOREI1_MEMBASE_REG:
2932                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2933                         break;
2934                 case OP_STOREI2_MEMBASE_REG:
2935                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2936                         break;
2937                 case OP_STORE_MEMBASE_REG:
2938                 case OP_STOREI8_MEMBASE_REG:
2939                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
2940                         break;
2941                 case OP_STOREI4_MEMBASE_REG:
2942                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2943                         break;
2944                 case OP_STORE_MEMBASE_IMM:
2945                 case OP_STOREI8_MEMBASE_IMM:
2946                         if (amd64_is_imm32 (ins->inst_imm))
2947                                 amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
2948                         else {
2949                                 amd64_mov_reg_imm (code, GP_SCRATCH_REG, ins->inst_imm);
2950                                 amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, GP_SCRATCH_REG, 8);
2951                         }
2952                         break;
2953                 case CEE_LDIND_I:
2954                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, sizeof (gpointer));
2955                         break;
2956                 case CEE_LDIND_I4:
2957                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2958                         break;
2959                 case CEE_LDIND_U4:
2960                         amd64_mov_reg_mem (code, ins->dreg, (gssize)ins->inst_p0, 4);
2961                         break;
2962                 case OP_LOADU4_MEM:
2963                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2964                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2965                         break;
2966                 case OP_LOAD_MEMBASE:
2967                 case OP_LOADI8_MEMBASE:
2968                         if (amd64_is_imm32 (ins->inst_offset)) {
2969                                 amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof (gpointer));
2970                         }
2971                         else {
2972                                 amd64_mov_reg_imm_size (code, GP_SCRATCH_REG, ins->inst_offset, 8);
2973                                 amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, GP_SCRATCH_REG, 0, 8);
2974                         }
2975                         break;
2976                 case OP_LOADI4_MEMBASE:
2977                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
2978                         break;
2979                 case OP_LOADU4_MEMBASE:
2980                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2981                         break;
2982                 case OP_LOADU1_MEMBASE:
2983                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2984                         break;
2985                 case OP_LOADI1_MEMBASE:
2986                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2987                         break;
2988                 case OP_LOADU2_MEMBASE:
2989                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2990                         break;
2991                 case OP_LOADI2_MEMBASE:
2992                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2993                         break;
2994                 case CEE_CONV_I1:
2995                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2996                         break;
2997                 case CEE_CONV_I2:
2998                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2999                         break;
3000                 case CEE_CONV_U1:
3001                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
3002                         break;
3003                 case CEE_CONV_U2:
3004                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
3005                         break;
3006                 case CEE_CONV_U8:
3007                 case CEE_CONV_U:
3008                         /* Clean out the upper word */
3009                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
3010                         break;
3011                 case CEE_CONV_I8:
3012                 case CEE_CONV_I:
3013                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
3014                         break;                  
3015                 case OP_COMPARE:
3016                 case OP_LCOMPARE:
3017                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3018                         break;
3019                 case OP_COMPARE_IMM:
3020                         if (!amd64_is_imm32 (ins->inst_imm)) {
3021                                 amd64_mov_reg_imm (code, AMD64_R11, ins->inst_imm);
3022                                 amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, AMD64_R11);
3023                         } else {
3024                                 amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
3025                         }
3026                         break;
3027                 case OP_X86_COMPARE_MEMBASE_REG:
3028                         amd64_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
3029                         break;
3030                 case OP_X86_COMPARE_MEMBASE_IMM:
3031                         g_assert (amd64_is_imm32 (ins->inst_imm));
3032                         amd64_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
3033                         break;
3034                 case OP_X86_COMPARE_REG_MEMBASE:
3035                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
3036                         break;
3037                 case OP_X86_TEST_NULL:
3038                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
3039                         break;
3040                 case OP_AMD64_TEST_NULL:
3041                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3042                         break;
3043                 case OP_X86_ADD_MEMBASE_IMM:
3044                         /* FIXME: Make a 64 version too */
3045                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3046                         break;
3047                 case OP_X86_ADD_MEMBASE:
3048                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3049                         break;
3050                 case OP_X86_SUB_MEMBASE_IMM:
3051                         g_assert (amd64_is_imm32 (ins->inst_imm));
3052                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3053                         break;
3054                 case OP_X86_SUB_MEMBASE:
3055                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3056                         break;
3057                 case OP_X86_INC_MEMBASE:
3058                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3059                         break;
3060                 case OP_X86_INC_REG:
3061                         amd64_inc_reg_size (code, ins->dreg, 4);
3062                         break;
3063                 case OP_X86_DEC_MEMBASE:
3064                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3065                         break;
3066                 case OP_X86_DEC_REG:
3067                         amd64_dec_reg_size (code, ins->dreg, 4);
3068                         break;
3069                 case OP_X86_MUL_MEMBASE:
3070                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3071                         break;
3072                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
3073                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3074                         break;
3075                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3076                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3077                         break;
3078                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
3079                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3080                         break;
3081                 case CEE_BREAK:
3082                         amd64_breakpoint (code);
3083                         break;
3084
3085                 case OP_ADDCC:
3086                 case CEE_ADD:
3087                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
3088                         break;
3089                 case OP_ADC:
3090                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
3091                         break;
3092                 case OP_ADD_IMM:
3093                         g_assert (amd64_is_imm32 (ins->inst_imm));
3094                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
3095                         break;
3096                 case OP_ADC_IMM:
3097                         g_assert (amd64_is_imm32 (ins->inst_imm));
3098                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
3099                         break;
3100                 case OP_SUBCC:
3101                 case CEE_SUB:
3102                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
3103                         break;
3104                 case OP_SBB:
3105                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
3106                         break;
3107                 case OP_SUB_IMM:
3108                         g_assert (amd64_is_imm32 (ins->inst_imm));
3109                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
3110                         break;
3111                 case OP_SBB_IMM:
3112                         g_assert (amd64_is_imm32 (ins->inst_imm));
3113                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
3114                         break;
3115                 case CEE_AND:
3116                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
3117                         break;
3118                 case OP_AND_IMM:
3119                         g_assert (amd64_is_imm32 (ins->inst_imm));
3120                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3121                         break;
3122                 case CEE_MUL:
3123                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3124                         break;
3125                 case OP_MUL_IMM:
3126                         amd64_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3127                         break;
3128                 case CEE_DIV:
3129                         amd64_cdq (code);
3130                         amd64_div_reg (code, ins->sreg2, TRUE);
3131                         break;
3132                 case CEE_DIV_UN:
3133                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3134                         amd64_div_reg (code, ins->sreg2, FALSE);
3135                         break;
3136                 case OP_DIV_IMM:
3137                         g_assert (amd64_is_imm32 (ins->inst_imm));
3138                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3139                         amd64_cdq (code);
3140                         amd64_div_reg (code, ins->sreg2, TRUE);
3141                         break;
3142                 case CEE_REM:
3143                         amd64_cdq (code);
3144                         amd64_div_reg (code, ins->sreg2, TRUE);
3145                         break;
3146                 case CEE_REM_UN:
3147                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3148                         amd64_div_reg (code, ins->sreg2, FALSE);
3149                         break;
3150                 case OP_REM_IMM:
3151                         g_assert (amd64_is_imm32 (ins->inst_imm));
3152                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3153                         amd64_cdq (code);
3154                         amd64_div_reg (code, ins->sreg2, TRUE);
3155                         break;
3156                 case CEE_OR:
3157                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3158                         break;
3159                 case OP_OR_IMM
3160 :                       g_assert (amd64_is_imm32 (ins->inst_imm));
3161                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3162                         break;
3163                 case CEE_XOR:
3164                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3165                         break;
3166                 case OP_XOR_IMM:
3167                         g_assert (amd64_is_imm32 (ins->inst_imm));
3168                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3169                         break;
3170                 case CEE_SHL:
3171                 case OP_LSHL:
3172                         g_assert (ins->sreg2 == AMD64_RCX);
3173                         amd64_shift_reg (code, X86_SHL, ins->dreg);
3174                         break;
3175                 case CEE_SHR:
3176                 case OP_LSHR:
3177                         g_assert (ins->sreg2 == AMD64_RCX);
3178                         amd64_shift_reg (code, X86_SAR, ins->dreg);
3179                         break;
3180                 case OP_SHR_IMM:
3181                         g_assert (amd64_is_imm32 (ins->inst_imm));
3182                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3183                         break;
3184                 case OP_LSHR_IMM:
3185                         g_assert (amd64_is_imm32 (ins->inst_imm));
3186                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3187                         break;
3188                 case OP_SHR_UN_IMM:
3189                         g_assert (amd64_is_imm32 (ins->inst_imm));
3190                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3191                         break;
3192                 case OP_LSHR_UN_IMM:
3193                         g_assert (amd64_is_imm32 (ins->inst_imm));
3194                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3195                         break;
3196                 case CEE_SHR_UN:
3197                         g_assert (ins->sreg2 == AMD64_RCX);
3198                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3199                         break;
3200                 case OP_LSHR_UN:
3201                         g_assert (ins->sreg2 == AMD64_RCX);
3202                         amd64_shift_reg (code, X86_SHR, ins->dreg);
3203                         break;
3204                 case OP_SHL_IMM:
3205                         g_assert (amd64_is_imm32 (ins->inst_imm));
3206                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3207                         break;
3208                 case OP_LSHL_IMM:
3209                         g_assert (amd64_is_imm32 (ins->inst_imm));
3210                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3211                         break;
3212
3213                 case OP_IADDCC:
3214                 case OP_IADD:
3215                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
3216                         break;
3217                 case OP_IADC:
3218                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
3219                         break;
3220                 case OP_IADD_IMM:
3221                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
3222                         break;
3223                 case OP_IADC_IMM:
3224                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
3225                         break;
3226                 case OP_ISUBCC:
3227                 case OP_ISUB:
3228                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
3229                         break;
3230                 case OP_ISBB:
3231                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
3232                         break;
3233                 case OP_ISUB_IMM:
3234                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
3235                         break;
3236                 case OP_ISBB_IMM:
3237                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
3238                         break;
3239                 case OP_IAND:
3240                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
3241                         break;
3242                 case OP_IAND_IMM:
3243                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
3244                         break;
3245                 case OP_IOR:
3246                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
3247                         break;
3248                 case OP_IOR_IMM:
3249                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
3250                         break;
3251                 case OP_IXOR:
3252                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
3253                         break;
3254                 case OP_IXOR_IMM:
3255                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
3256                         break;
3257                 case OP_INEG:
3258                         amd64_neg_reg_size (code, ins->sreg1, 4);
3259                         break;
3260                 case OP_INOT:
3261                         amd64_not_reg_size (code, ins->sreg1, 4);
3262                         break;
3263                 case OP_ISHL:
3264                         g_assert (ins->sreg2 == AMD64_RCX);
3265                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
3266                         break;
3267                 case OP_ISHR:
3268                         g_assert (ins->sreg2 == AMD64_RCX);
3269                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
3270                         break;
3271                 case OP_ISHR_IMM:
3272                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
3273                         break;
3274                 case OP_ISHR_UN_IMM:
3275                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
3276                         break;
3277                 case OP_ISHR_UN:
3278                         g_assert (ins->sreg2 == AMD64_RCX);
3279                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
3280                         break;
3281                 case OP_ISHL_IMM:
3282                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
3283                         break;
3284                 case OP_IMUL:
3285                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3286                         break;
3287                 case OP_IMUL_IMM:
3288                         amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, 4);
3289                         break;
3290                 case OP_IMUL_OVF:
3291                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
3292                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3293                         break;
3294                 case OP_IMUL_OVF_UN: {
3295                         /* the mul operation and the exception check should most likely be split */
3296                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3297                         /*g_assert (ins->sreg2 == X86_EAX);
3298                         g_assert (ins->dreg == X86_EAX);*/
3299                         if (ins->sreg2 == X86_EAX) {
3300                                 non_eax_reg = ins->sreg1;
3301                         } else if (ins->sreg1 == X86_EAX) {
3302                                 non_eax_reg = ins->sreg2;
3303                         } else {
3304                                 /* no need to save since we're going to store to it anyway */
3305                                 if (ins->dreg != X86_EAX) {
3306                                         saved_eax = TRUE;
3307                                         amd64_push_reg (code, X86_EAX);
3308                                 }
3309                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3310                                 non_eax_reg = ins->sreg2;
3311                         }
3312                         if (ins->dreg == X86_EDX) {
3313                                 if (!saved_eax) {
3314                                         saved_eax = TRUE;
3315                                         amd64_push_reg (code, X86_EAX);
3316                                 }
3317                         } else if (ins->dreg != X86_EAX) {
3318                                 saved_edx = TRUE;
3319                                 amd64_push_reg (code, X86_EDX);
3320                         }
3321                         amd64_mul_reg_size (code, non_eax_reg, FALSE, 4);
3322                         /* save before the check since pop and mov don't change the flags */
3323                         if (ins->dreg != X86_EAX)
3324                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3325                         if (saved_edx)
3326                                 amd64_pop_reg (code, X86_EDX);
3327                         if (saved_eax)
3328                                 amd64_pop_reg (code, X86_EAX);
3329                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3330                         break;
3331                 }
3332                 case OP_IDIV:
3333                         amd64_cdq_size (code, 4);
3334                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3335                         break;
3336                 case OP_IDIV_UN:
3337                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3338                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3339                         break;
3340                 case OP_IDIV_IMM:
3341                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3342                         amd64_cdq_size (code, 4);
3343                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3344                         break;
3345                 case OP_IREM:
3346                         amd64_cdq_size (code, 4);
3347                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3348                         break;
3349                 case OP_IREM_UN:
3350                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
3351                         amd64_div_reg_size (code, ins->sreg2, 4, FALSE);
3352                         break;
3353                 case OP_IREM_IMM:
3354                         amd64_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3355                         amd64_cdq_size (code, 4);
3356                         amd64_div_reg_size (code, ins->sreg2, 4, TRUE);
3357                         break;
3358
3359                 case OP_ICOMPARE:
3360                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
3361                         break;
3362                 case OP_ICOMPARE_IMM:
3363                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
3364                         break;
3365
3366                 case OP_IBEQ:
3367                 case OP_IBLT:
3368                 case OP_IBGT:
3369                 case OP_IBGE:
3370                 case OP_IBLE:
3371                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), TRUE);
3372                         break;
3373                 case OP_IBNE_UN:
3374                 case OP_IBLT_UN:
3375                 case OP_IBGT_UN:
3376                 case OP_IBGE_UN:
3377                 case OP_IBLE_UN:
3378                         EMIT_COND_BRANCH (ins, opcode_to_x86_cond (ins->opcode), FALSE);
3379                         break;
3380                 case OP_COND_EXC_IOV:
3381                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3382                                                                                 TRUE, ins->inst_p1);
3383                         break;
3384                 case OP_COND_EXC_IC:
3385                         EMIT_COND_SYSTEM_EXCEPTION (opcode_to_x86_cond (ins->opcode),
3386                                                                                 FALSE, ins->inst_p1);
3387                         break;
3388                 case CEE_NOT:
3389                         amd64_not_reg (code, ins->sreg1);
3390                         break;
3391                 case CEE_NEG:
3392                         amd64_neg_reg (code, ins->sreg1);
3393                         break;
3394                 case OP_SEXT_I1:
3395                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3396                         break;
3397                 case OP_SEXT_I2:
3398                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3399                         break;
3400                 case OP_ICONST:
3401                 case OP_I8CONST:
3402                         if ((((guint64)ins->inst_c0) >> 32) == 0)
3403                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
3404                         else
3405                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
3406                         break;
3407                 case OP_AOTCONST:
3408                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3409                         amd64_set_reg_template (code, ins->dreg);
3410                         break;
3411                 case CEE_CONV_I4:
3412                 case CEE_CONV_U4:
3413                 case OP_MOVE:
3414                 case OP_SETREG:
3415                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof (gpointer));
3416                         break;
3417                 case OP_AMD64_SET_XMMREG_R4: {
3418                         /* FIXME: optimize this */
3419                         amd64_fst_membase (code, AMD64_RSP, -8, FALSE, TRUE);
3420                         /* ins->dreg is set to -1 by the reg allocator */
3421                         amd64_movss_reg_membase (code, ins->unused, AMD64_RSP, -8);
3422                         break;
3423                 }
3424                 case OP_AMD64_SET_XMMREG_R8: {
3425                         /* FIXME: optimize this */
3426                         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE);
3427                         /* ins->dreg is set to -1 by the reg allocator */
3428                         amd64_movsd_reg_membase (code, ins->unused, AMD64_RSP, -8);
3429                         break;
3430                 }
3431                 case CEE_JMP: {
3432                         /*
3433                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3434                          * Keep in sync with the code in emit_epilog.
3435                          */
3436                         int pos = 0, i;
3437
3438                         /* FIXME: no tracing support... */
3439                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3440                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3441
3442                         g_assert (!cfg->method->save_lmf);
3443
3444                         code = emit_load_volatile_arguments (cfg, code);
3445
3446                         for (i = 0; i < AMD64_NREG; ++i)
3447                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
3448                                         pos -= sizeof (gpointer);
3449                         
3450                         if (pos)
3451                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
3452
3453                         /* Pop registers in reverse order */
3454                         for (i = AMD64_NREG - 1; i > 0; --i)
3455                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
3456                                         amd64_pop_reg (code, i);
3457                                 }
3458
3459                         amd64_leave (code);
3460                         offset = code - cfg->native_code;
3461                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3462                         amd64_set_reg_template (code, AMD64_R11);
3463                         amd64_jump_reg (code, AMD64_R11);
3464                         break;
3465                 }
3466                 case OP_CHECK_THIS:
3467                         /* ensure ins->sreg1 is not NULL */
3468                         amd64_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
3469                         break;
3470                 case OP_ARGLIST: {
3471                         amd64_lea_membase (code, AMD64_R11, AMD64_RBP, cfg->sig_cookie);
3472                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, 8);
3473                         break;
3474                 }
3475                 case OP_FCALL:
3476                 case OP_LCALL:
3477                 case OP_VCALL:
3478                 case OP_VOIDCALL:
3479                 case CEE_CALL:
3480                         call = (MonoCallInst*)ins;
3481                         /*
3482                          * The AMD64 ABI forces callers to know about varargs.
3483                          */
3484                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
3485                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3486
3487                         if (ins->flags & MONO_INST_HAS_METHOD)
3488                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3489                         else
3490                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3491                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3492                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3493                         code = emit_move_return_value (cfg, ins, code);
3494                         break;
3495                 case OP_FCALL_REG:
3496                 case OP_LCALL_REG:
3497                 case OP_VCALL_REG:
3498                 case OP_VOIDCALL_REG:
3499                 case OP_CALL_REG:
3500                         call = (MonoCallInst*)ins;
3501
3502                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3503                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3504                                 ins->sreg1 = AMD64_R11;
3505                         }
3506
3507                         /*
3508                          * The AMD64 ABI forces callers to know about varargs.
3509                          */
3510                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
3511                                 if (ins->sreg1 == AMD64_RAX) {
3512                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
3513                                         ins->sreg1 = AMD64_R11;
3514                                 }
3515                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3516                         }
3517                         amd64_call_reg (code, ins->sreg1);
3518                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3519                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3520                         code = emit_move_return_value (cfg, ins, code);
3521                         break;
3522                 case OP_FCALL_MEMBASE:
3523                 case OP_LCALL_MEMBASE:
3524                 case OP_VCALL_MEMBASE:
3525                 case OP_VOIDCALL_MEMBASE:
3526                 case OP_CALL_MEMBASE:
3527                         call = (MonoCallInst*)ins;
3528
3529                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
3530                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
3531                                 ins->sreg1 = AMD64_R11;
3532                         }
3533
3534                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
3535                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
3536                                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, call->stack_usage);
3537                         code = emit_move_return_value (cfg, ins, code);
3538                         break;
3539                 case OP_OUTARG:
3540                 case OP_X86_PUSH:
3541                         amd64_push_reg (code, ins->sreg1);
3542                         break;
3543                 case OP_X86_PUSH_IMM:
3544                         g_assert (amd64_is_imm32 (ins->inst_imm));
3545                         amd64_push_imm (code, ins->inst_imm);
3546                         break;
3547                 case OP_X86_PUSH_MEMBASE:
3548                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
3549                         break;
3550                 case OP_X86_PUSH_OBJ: 
3551                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ins->inst_imm);
3552                         amd64_push_reg (code, AMD64_RDI);
3553                         amd64_push_reg (code, AMD64_RSI);
3554                         amd64_push_reg (code, AMD64_RCX);
3555                         if (ins->inst_offset)
3556                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
3557                         else
3558                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
3559                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, 3 * 8);
3560                         amd64_mov_reg_imm (code, AMD64_RCX, (ins->inst_imm >> 3));
3561                         amd64_cld (code);
3562                         amd64_prefix (code, X86_REP_PREFIX);
3563                         amd64_movsd (code);
3564                         amd64_pop_reg (code, AMD64_RCX);
3565                         amd64_pop_reg (code, AMD64_RSI);
3566                         amd64_pop_reg (code, AMD64_RDI);
3567                         break;
3568                 case OP_X86_LEA:
3569                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3570                         break;
3571                 case OP_X86_LEA_MEMBASE:
3572                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3573                         break;
3574                 case OP_X86_XCHG:
3575                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3576                         break;
3577                 case OP_LOCALLOC:
3578                         /* keep alignment */
3579                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3580                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3581                         code = mono_emit_stack_alloc (code, ins);
3582                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
3583                         break;
3584                 case CEE_RET:
3585                         amd64_ret (code);
3586                         break;
3587                 case CEE_THROW: {
3588                         amd64_mov_reg_reg (code, AMD64_RDI, ins->sreg1, 8);
3589                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3590                                              (gpointer)"mono_arch_throw_exception");
3591                         break;
3592                 }
3593                 case OP_CALL_HANDLER: 
3594                         /* Align stack */
3595                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
3596                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3597                         amd64_call_imm (code, 0);
3598                         /* Restore stack alignment */
3599                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3600                         break;
3601                 case OP_LABEL:
3602                         ins->inst_c0 = code - cfg->native_code;
3603                         break;
3604                 case CEE_BR:
3605                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3606                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3607                         //break;
3608                         if (ins->flags & MONO_INST_BRLABEL) {
3609                                 if (ins->inst_i0->inst_c0) {
3610                                         amd64_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3611                                 } else {
3612                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3613                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3614                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3615                                                 x86_jump8 (code, 0);
3616                                         else 
3617                                                 x86_jump32 (code, 0);
3618                                 }
3619                         } else {
3620                                 if (ins->inst_target_bb->native_offset) {
3621                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3622                                 } else {
3623                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3624                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3625                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3626                                                 x86_jump8 (code, 0);
3627                                         else 
3628                                                 x86_jump32 (code, 0);
3629                                 } 
3630                         }
3631                         break;
3632                 case OP_BR_REG:
3633                         amd64_jump_reg (code, ins->sreg1);
3634                         break;
3635                 case OP_CEQ:
3636                 case OP_ICEQ:
3637                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3638                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3639                         break;
3640                 case OP_CLT:
3641                 case OP_ICLT:
3642                         amd64_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3643                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3644                         break;
3645                 case OP_CLT_UN:
3646                 case OP_ICLT_UN:
3647                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3648                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3649                         break;
3650                 case OP_CGT:
3651                 case OP_ICGT:
3652                         amd64_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3653                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3654                         break;
3655                 case OP_CGT_UN:
3656                 case OP_ICGT_UN:
3657                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3658                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3659                         break;
3660                 case OP_COND_EXC_EQ:
3661                 case OP_COND_EXC_NE_UN:
3662                 case OP_COND_EXC_LT:
3663                 case OP_COND_EXC_LT_UN:
3664                 case OP_COND_EXC_GT:
3665                 case OP_COND_EXC_GT_UN:
3666                 case OP_COND_EXC_GE:
3667                 case OP_COND_EXC_GE_UN:
3668                 case OP_COND_EXC_LE:
3669                 case OP_COND_EXC_LE_UN:
3670                 case OP_COND_EXC_OV:
3671                 case OP_COND_EXC_NO:
3672                 case OP_COND_EXC_C:
3673                 case OP_COND_EXC_NC:
3674                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3675                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3676                         break;
3677                 case CEE_BEQ:
3678                 case CEE_BNE_UN:
3679                 case CEE_BLT:
3680                 case CEE_BLT_UN:
3681                 case CEE_BGT:
3682                 case CEE_BGT_UN:
3683                 case CEE_BGE:
3684                 case CEE_BGE_UN:
3685                 case CEE_BLE:
3686                 case CEE_BLE_UN:
3687                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3688                         break;
3689
3690                 /* floating point opcodes */
3691                 case OP_R8CONST: {
3692                         double d = *(double *)ins->inst_p0;
3693
3694                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3695                                 amd64_fldz (code);
3696                         } else if (d == 1.0) {
3697                                 x86_fld1 (code);
3698                         } else {
3699                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
3700                                 amd64_fld_membase (code, AMD64_RIP, 0, TRUE);
3701                         }
3702                         break;
3703                 }
3704                 case OP_R4CONST: {
3705                         float f = *(float *)ins->inst_p0;
3706
3707                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3708                                 amd64_fldz (code);
3709                         } else if (f == 1.0) {
3710                                 x86_fld1 (code);
3711                         } else {
3712                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
3713                                 amd64_fld_membase (code, AMD64_RIP, 0, FALSE);
3714                         }
3715                         break;
3716                 }
3717                 case OP_STORER8_MEMBASE_REG:
3718                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3719                         break;
3720                 case OP_LOADR8_SPILL_MEMBASE:
3721                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3722                         amd64_fxch (code, 1);
3723                         break;
3724                 case OP_LOADR8_MEMBASE:
3725                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3726                         break;
3727                 case OP_STORER4_MEMBASE_REG:
3728                         amd64_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3729                         break;
3730                 case OP_LOADR4_MEMBASE:
3731                         amd64_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3732                         break;
3733                 case CEE_CONV_R4: /* FIXME: change precision */
3734                 case CEE_CONV_R8:
3735                         amd64_push_reg (code, ins->sreg1);
3736                         amd64_fild_membase (code, AMD64_RSP, 0, FALSE);
3737                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3738                         break;
3739                 case CEE_CONV_R_UN:
3740                         /* Emulated */
3741                         g_assert_not_reached ();
3742                         break;
3743                 case OP_LCONV_TO_R4: /* FIXME: change precision */
3744                 case OP_LCONV_TO_R8:
3745                         amd64_push_reg (code, ins->sreg1);
3746                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3747                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
3748                         break;
3749                 case OP_X86_FP_LOAD_I8:
3750                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3751                         break;
3752                 case OP_X86_FP_LOAD_I4:
3753                         amd64_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3754                         break;
3755                 case OP_FCONV_TO_I1:
3756                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3757                         break;
3758                 case OP_FCONV_TO_U1:
3759                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3760                         break;
3761                 case OP_FCONV_TO_I2:
3762                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3763                         break;
3764                 case OP_FCONV_TO_U2:
3765                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3766                         break;
3767                 case OP_FCONV_TO_I4:
3768                 case OP_FCONV_TO_I:
3769                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3770                         break;
3771                 case OP_FCONV_TO_I8:
3772                         code = emit_float_to_int (cfg, code, ins->dreg, 8, TRUE);
3773                         break;
3774                 case OP_LCONV_TO_R_UN: { 
3775                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3776                         guint8 *br;
3777
3778                         /* load 64bit integer to FP stack */
3779                         amd64_push_imm (code, 0);
3780                         amd64_push_reg (code, ins->sreg2);
3781                         amd64_push_reg (code, ins->sreg1);
3782                         amd64_fild_membase (code, AMD64_RSP, 0, TRUE);
3783                         /* store as 80bit FP value */
3784                         x86_fst80_membase (code, AMD64_RSP, 0);
3785                         
3786                         /* test if lreg is negative */
3787                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3788                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3789         
3790                         /* add correction constant mn */
3791                         x86_fld80_mem (code, mn);
3792                         x86_fld80_membase (code, AMD64_RSP, 0);
3793                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3794                         x86_fst80_membase (code, AMD64_RSP, 0);
3795
3796                         amd64_patch (br, code);
3797
3798                         x86_fld80_membase (code, AMD64_RSP, 0);
3799                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 12);
3800
3801                         break;
3802                 }
3803                 case OP_LCONV_TO_OVF_I: {
3804                         guint8 *br [3], *label [1];
3805
3806                         /* 
3807                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3808                          */
3809                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3810
3811                         /* If the low word top bit is set, see if we are negative */
3812                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3813                         /* We are not negative (no top bit set, check for our top word to be zero */
3814                         amd64_test_reg_reg (code, ins->sreg2, ins->sreg2);
3815                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3816                         label [0] = code;
3817
3818                         /* throw exception */
3819                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3820                         x86_jump32 (code, 0);
3821         
3822                         amd64_patch (br [0], code);
3823                         /* our top bit is set, check that top word is 0xfffffff */
3824                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3825                 
3826                         amd64_patch (br [1], code);
3827                         /* nope, emit exception */
3828                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3829                         amd64_patch (br [2], label [0]);
3830
3831                         if (ins->dreg != ins->sreg1)
3832                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3833                         break;
3834                 }
3835                 case CEE_CONV_OVF_U4:
3836                         /* FIXME: */
3837                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
3838                         break;
3839                 case OP_FADD:
3840                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3841                         break;
3842                 case OP_FSUB:
3843                         amd64_fp_op_reg (code, X86_FSUB, 1, TRUE);
3844                         break;          
3845                 case OP_FMUL:
3846                         amd64_fp_op_reg (code, X86_FMUL, 1, TRUE);
3847                         break;          
3848                 case OP_FDIV:
3849                         amd64_fp_op_reg (code, X86_FDIV, 1, TRUE);
3850                         break;          
3851                 case OP_FNEG:
3852                         amd64_fchs (code);
3853                         break;          
3854                 case OP_SIN:
3855                         amd64_fsin (code);
3856                         amd64_fldz (code);
3857                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3858                         break;          
3859                 case OP_COS:
3860                         amd64_fcos (code);
3861                         amd64_fldz (code);
3862                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3863                         break;          
3864                 case OP_ABS:
3865                         amd64_fabs (code);
3866                         break;          
3867                 case OP_TAN: {
3868                         /* 
3869                          * it really doesn't make sense to inline all this code,
3870                          * it's here just to show that things may not be as simple 
3871                          * as they appear.
3872                          */
3873                         guchar *check_pos, *end_tan, *pop_jump;
3874                         amd64_push_reg (code, AMD64_RAX);
3875                         amd64_fptan (code);
3876                         amd64_fnstsw (code);
3877                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3878                         check_pos = code;
3879                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3880                         amd64_fstp (code, 0); /* pop the 1.0 */
3881                         end_tan = code;
3882                         x86_jump8 (code, 0);
3883                         amd64_fldpi (code);
3884                         amd64_fp_op (code, X86_FADD, 0);
3885                         amd64_fxch (code, 1);
3886                         x86_fprem1 (code);
3887                         amd64_fstsw (code);
3888                         amd64_test_reg_imm (code, AMD64_RAX, X86_FP_C2);
3889                         pop_jump = code;
3890                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3891                         amd64_fstp (code, 1);
3892                         amd64_fptan (code);
3893                         amd64_patch (pop_jump, code);
3894                         amd64_fstp (code, 0); /* pop the 1.0 */
3895                         amd64_patch (check_pos, code);
3896                         amd64_patch (end_tan, code);
3897                         amd64_fldz (code);
3898                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3899                         amd64_pop_reg (code, AMD64_RAX);
3900                         break;
3901                 }
3902                 case OP_ATAN:
3903                         x86_fld1 (code);
3904                         amd64_fpatan (code);
3905                         amd64_fldz (code);
3906                         amd64_fp_op_reg (code, X86_FADD, 1, TRUE);
3907                         break;          
3908                 case OP_SQRT:
3909                         amd64_fsqrt (code);
3910                         break;          
3911                 case OP_X86_FPOP:
3912                         amd64_fstp (code, 0);
3913                         break;          
3914                 case OP_FREM: {
3915                         guint8 *l1, *l2;
3916
3917                         amd64_push_reg (code, AMD64_RAX);
3918                         /* we need to exchange ST(0) with ST(1) */
3919                         amd64_fxch (code, 1);
3920
3921                         /* this requires a loop, because fprem somtimes 
3922                          * returns a partial remainder */
3923                         l1 = code;
3924                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3925                         /* x86_fprem1 (code); */
3926                         amd64_fprem (code);
3927                         amd64_fnstsw (code);
3928                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_C2);
3929                         l2 = code + 2;
3930                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3931
3932                         /* pop result */
3933                         amd64_fstp (code, 1);
3934
3935                         amd64_pop_reg (code, AMD64_RAX);
3936                         break;
3937                 }
3938                 case OP_FCOMPARE:
3939                         if (cfg->opt & MONO_OPT_FCMOV) {
3940                                 amd64_fcomip (code, 1);
3941                                 amd64_fstp (code, 0);
3942                                 break;
3943                         }
3944                         /* this overwrites EAX */
3945                         EMIT_FPCOMPARE(code);
3946                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3947                         break;
3948                 case OP_FCEQ:
3949                         if (cfg->opt & MONO_OPT_FCMOV) {
3950                                 /* zeroing the register at the start results in 
3951                                  * shorter and faster code (we can also remove the widening op)
3952                                  */
3953                                 guchar *unordered_check;
3954                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3955                                 amd64_fcomip (code, 1);
3956                                 amd64_fstp (code, 0);
3957                                 unordered_check = code;
3958                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3959                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3960                                 amd64_patch (unordered_check, code);
3961                                 break;
3962                         }
3963                         if (ins->dreg != AMD64_RAX) 
3964                                 amd64_push_reg (code, AMD64_RAX);
3965
3966                         EMIT_FPCOMPARE(code);
3967                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
3968                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
3969                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3970                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3971
3972                         if (ins->dreg != AMD64_RAX) 
3973                                 amd64_pop_reg (code, AMD64_RAX);
3974                         break;
3975                 case OP_FCLT:
3976                 case OP_FCLT_UN:
3977                         if (cfg->opt & MONO_OPT_FCMOV) {
3978                                 /* zeroing the register at the start results in 
3979                                  * shorter and faster code (we can also remove the widening op)
3980                                  */
3981                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3982                                 amd64_fcomip (code, 1);
3983                                 amd64_fstp (code, 0);
3984                                 if (ins->opcode == OP_FCLT_UN) {
3985                                         guchar *unordered_check = code;
3986                                         guchar *jump_to_end;
3987                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3988                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3989                                         jump_to_end = code;
3990                                         x86_jump8 (code, 0);
3991                                         amd64_patch (unordered_check, code);
3992                                         amd64_inc_reg (code, ins->dreg);
3993                                         amd64_patch (jump_to_end, code);
3994                                 } else {
3995                                         amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3996                                 }
3997                                 break;
3998                         }
3999                         if (ins->dreg != AMD64_RAX) 
4000                                 amd64_push_reg (code, AMD64_RAX);
4001
4002                         EMIT_FPCOMPARE(code);
4003                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
4004                         if (ins->opcode == OP_FCLT_UN) {
4005                                 guchar *is_not_zero_check, *end_jump;
4006                                 is_not_zero_check = code;
4007                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4008                                 end_jump = code;
4009                                 x86_jump8 (code, 0);
4010                                 amd64_patch (is_not_zero_check, code);
4011                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4012
4013                                 amd64_patch (end_jump, code);
4014                         }
4015                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
4016                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4017
4018                         if (ins->dreg != AMD64_RAX) 
4019                                 amd64_pop_reg (code, AMD64_RAX);
4020                         break;
4021                 case OP_FCGT:
4022                 case OP_FCGT_UN:
4023                         if (cfg->opt & MONO_OPT_FCMOV) {
4024                                 /* zeroing the register at the start results in 
4025                                  * shorter and faster code (we can also remove the widening op)
4026                                  */
4027                                 guchar *unordered_check;
4028                                 amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4029                                 amd64_fcomip (code, 1);
4030                                 amd64_fstp (code, 0);
4031                                 if (ins->opcode == OP_FCGT) {
4032                                         unordered_check = code;
4033                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
4034                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4035                                         amd64_patch (unordered_check, code);
4036                                 } else {
4037                                         amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
4038                                 }
4039                                 break;
4040                         }
4041                         if (ins->dreg != AMD64_RAX) 
4042                                 amd64_push_reg (code, AMD64_RAX);
4043
4044                         EMIT_FPCOMPARE(code);
4045                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, X86_FP_CC_MASK);
4046                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4047                         if (ins->opcode == OP_FCGT_UN) {
4048                                 guchar *is_not_zero_check, *end_jump;
4049                                 is_not_zero_check = code;
4050                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4051                                 end_jump = code;
4052                                 x86_jump8 (code, 0);
4053                                 amd64_patch (is_not_zero_check, code);
4054                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4055
4056                                 amd64_patch (end_jump, code);
4057                         }
4058                         amd64_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
4059                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4060
4061                         if (ins->dreg != AMD64_RAX) 
4062                                 amd64_pop_reg (code, AMD64_RAX);
4063                         break;
4064                 case OP_FBEQ:
4065                         if (cfg->opt & MONO_OPT_FCMOV) {
4066                                 guchar *jump = code;
4067                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
4068                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4069                                 amd64_patch (jump, code);
4070                                 break;
4071                         }
4072                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0x4000);
4073                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
4074                         break;
4075                 case OP_FBNE_UN:
4076                         /* Branch if C013 != 100 */
4077                         if (cfg->opt & MONO_OPT_FCMOV) {
4078                                 /* branch if !ZF or (PF|CF) */
4079                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4080                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4081                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
4082                                 break;
4083                         }
4084                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4085                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4086                         break;
4087                 case OP_FBLT:
4088                         if (cfg->opt & MONO_OPT_FCMOV) {
4089                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4090                                 break;
4091                         }
4092                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4093                         break;
4094                 case OP_FBLT_UN:
4095                         if (cfg->opt & MONO_OPT_FCMOV) {
4096                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4097                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
4098                                 break;
4099                         }
4100                         if (ins->opcode == OP_FBLT_UN) {
4101                                 guchar *is_not_zero_check, *end_jump;
4102                                 is_not_zero_check = code;
4103                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4104                                 end_jump = code;
4105                                 x86_jump8 (code, 0);
4106                                 amd64_patch (is_not_zero_check, code);
4107                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4108
4109                                 amd64_patch (end_jump, code);
4110                         }
4111                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4112                         break;
4113                 case OP_FBGT:
4114                 case OP_FBGT_UN:
4115                         if (cfg->opt & MONO_OPT_FCMOV) {
4116                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
4117                                 break;
4118                         }
4119                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4120                         if (ins->opcode == OP_FBGT_UN) {
4121                                 guchar *is_not_zero_check, *end_jump;
4122                                 is_not_zero_check = code;
4123                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
4124                                 end_jump = code;
4125                                 x86_jump8 (code, 0);
4126                                 amd64_patch (is_not_zero_check, code);
4127                                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_CC_MASK);
4128
4129                                 amd64_patch (end_jump, code);
4130                         }
4131                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4132                         break;
4133                 case OP_FBGE:
4134                         /* Branch if C013 == 100 or 001 */
4135                         if (cfg->opt & MONO_OPT_FCMOV) {
4136                                 guchar *br1;
4137
4138                                 /* skip branch if C1=1 */
4139                                 br1 = code;
4140                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4141                                 /* branch if (C0 | C3) = 1 */
4142                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
4143                                 amd64_patch (br1, code);
4144                                 break;
4145                         }
4146                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4147                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4148                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C3);
4149                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4150                         break;
4151                 case OP_FBGE_UN:
4152                         /* Branch if C013 == 000 */
4153                         if (cfg->opt & MONO_OPT_FCMOV) {
4154                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
4155                                 break;
4156                         }
4157                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4158                         break;
4159                 case OP_FBLE:
4160                         /* Branch if C013=000 or 100 */
4161                         if (cfg->opt & MONO_OPT_FCMOV) {
4162                                 guchar *br1;
4163
4164                                 /* skip branch if C1=1 */
4165                                 br1 = code;
4166                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4167                                 /* branch if C0=0 */
4168                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
4169                                 amd64_patch (br1, code);
4170                                 break;
4171                         }
4172                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, (X86_FP_C0|X86_FP_C1));
4173                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4174                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4175                         break;
4176                 case OP_FBLE_UN:
4177                         /* Branch if C013 != 001 */
4178                         if (cfg->opt & MONO_OPT_FCMOV) {
4179                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4180                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4181                                 break;
4182                         }
4183                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4184                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4185                         break;
4186                 case CEE_CKFINITE: {
4187                         amd64_push_reg (code, AMD64_RAX);
4188                         amd64_fxam (code);
4189                         amd64_fnstsw (code);
4190                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
4191                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
4192                         amd64_pop_reg (code, AMD64_RAX);
4193                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4194                         break;
4195                 }
4196                 case OP_X86_TLS_GET: {
4197                         x86_prefix (code, X86_FS_PREFIX);
4198                         amd64_mov_reg_mem (code, ins->dreg, ins->inst_offset, 8);
4199                         break;
4200                 }
4201                 default:
4202                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4203                         g_assert_not_reached ();
4204                 }
4205
4206                 if ((code - cfg->native_code - offset) > max_len) {
4207                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
4208                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4209                         g_assert_not_reached ();
4210                 }
4211                
4212                 cpos += max_len;
4213
4214                 last_ins = ins;
4215                 last_offset = offset;
4216                 
4217                 ins = ins->next;
4218         }
4219
4220         cfg->code_len = code - cfg->native_code;
4221 }
4222
4223 void
4224 mono_arch_register_lowlevel_calls (void)
4225 {
4226 }
4227
4228 void
4229 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4230 {
4231         MonoJumpInfo *patch_info;
4232
4233         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4234                 unsigned char *ip = patch_info->ip.i + code;
4235                 const unsigned char *target;
4236
4237                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4238
4239                 switch (patch_info->type) {
4240                 case MONO_PATCH_INFO_METHOD_REL:
4241                 case MONO_PATCH_INFO_METHOD_JUMP:
4242                         *((gconstpointer *)(ip + 2)) = target;
4243                         continue;
4244                 case MONO_PATCH_INFO_SWITCH: {
4245                         *((gconstpointer *)(ip + 2)) = target;
4246                         continue;
4247                 }
4248                 case MONO_PATCH_INFO_IID:
4249                         *((guint32 *)(ip + 2)) = (guint32)(guint64)target;
4250                         continue;                       
4251                 case MONO_PATCH_INFO_CLASS_INIT: {
4252                         /* FIXME: Might already been changed to a nop */
4253                         *((gconstpointer *)(ip + 2)) = target;
4254                         continue;
4255                 }
4256                 case MONO_PATCH_INFO_R8:
4257                 case MONO_PATCH_INFO_R4:
4258                         g_assert_not_reached ();
4259                         continue;
4260                 case MONO_PATCH_INFO_METHODCONST:
4261                 case MONO_PATCH_INFO_CLASS:
4262                 case MONO_PATCH_INFO_IMAGE:
4263                 case MONO_PATCH_INFO_FIELD:
4264                 case MONO_PATCH_INFO_VTABLE:
4265                 case MONO_PATCH_INFO_SFLDA:
4266                 case MONO_PATCH_INFO_EXC_NAME:
4267                 case MONO_PATCH_INFO_LDSTR:
4268                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
4269                 case MONO_PATCH_INFO_LDTOKEN:
4270                 case MONO_PATCH_INFO_IP:
4271                         *((gconstpointer *)(ip + 2)) = target;
4272                         continue;
4273                 case MONO_PATCH_INFO_METHOD:
4274                         *((gconstpointer *)(ip + 2)) = target;
4275                         continue;
4276                 case MONO_PATCH_INFO_ABS:
4277                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4278                         break;
4279                 default:
4280                         break;
4281                 }
4282                 amd64_patch (ip, (gpointer)target);
4283         }
4284 }
4285
4286 guint8 *
4287 mono_arch_emit_prolog (MonoCompile *cfg)
4288 {
4289         MonoMethod *method = cfg->method;
4290         MonoBasicBlock *bb;
4291         MonoMethodSignature *sig;
4292         MonoInst *inst;
4293         int alloc_size, pos, max_offset, i;
4294         guint8 *code;
4295         CallInfo *cinfo;
4296
4297         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
4298         code = cfg->native_code = g_malloc (cfg->code_size);
4299
4300         amd64_push_reg (code, AMD64_RBP);
4301         amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof (gpointer));
4302
4303         /* Stack alignment check */
4304 #if 0
4305         {
4306                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
4307                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
4308                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
4309                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
4310                 amd64_breakpoint (code);
4311         }
4312 #endif
4313
4314         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
4315         pos = 0;
4316
4317         if (method->save_lmf) {
4318
4319                 pos = ALIGN_TO (pos + sizeof (MonoLMF), 16);
4320
4321                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, pos);
4322
4323                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4324
4325                 /* Save ip */
4326                 amd64_lea_membase (code, AMD64_R11, AMD64_RIP, 0);
4327                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rip), AMD64_R11, 8);
4328                 /* Save fp */
4329                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebp), AMD64_RBP, 8);
4330                 /* Save method */
4331                 /* FIXME: add a relocation for this */
4332                 if (IS_IMM32 (cfg->method))
4333                         amd64_mov_membase_imm (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), (guint64)cfg->method, 8);
4334                 else {
4335                         amd64_mov_reg_imm (code, AMD64_R11, cfg->method);
4336                         amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, method), AMD64_R11, 8);
4337                 }
4338                 /* Save callee saved regs */
4339                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), AMD64_RBX, 8);
4340                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), AMD64_R12, 8);
4341                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), AMD64_R13, 8);
4342                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), AMD64_R14, 8);
4343                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), AMD64_R15, 8);
4344         } else {
4345
4346                 for (i = 0; i < AMD64_NREG; ++i)
4347                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4348                                 amd64_push_reg (code, i);
4349                                 pos += sizeof (gpointer);
4350                         }
4351         }
4352
4353         alloc_size -= pos;
4354
4355         if (alloc_size) {
4356                 /* See mono_emit_stack_alloc */
4357 #ifdef PLATFORM_WIN32
4358                 guint32 remaining_size = alloc_size;
4359                 while (remaining_size >= 0x1000) {
4360                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
4361                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
4362                         remaining_size -= 0x1000;
4363                 }
4364                 if (remaining_size)
4365                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
4366 #else
4367                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
4368 #endif
4369         }
4370
4371         /* compute max_offset in order to use short forward jumps */
4372         max_offset = 0;
4373         if (cfg->opt & MONO_OPT_BRANCH) {
4374                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4375                         MonoInst *ins = bb->code;
4376                         bb->max_offset = max_offset;
4377
4378                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4379                                 max_offset += 6;
4380                         /* max alignment for loops */
4381                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4382                                 max_offset += LOOP_ALIGNMENT;
4383
4384                         while (ins) {
4385                                 if (ins->opcode == OP_LABEL)
4386                                         ins->inst_c1 = max_offset;
4387                                 
4388                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4389                                 ins = ins->next;
4390                         }
4391                 }
4392         }
4393
4394         sig = method->signature;
4395         pos = 0;
4396
4397         cinfo = get_call_info (sig, FALSE);
4398
4399         if (sig->ret->type != MONO_TYPE_VOID) {
4400                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
4401                         /* Save volatile arguments to the stack */
4402                         amd64_mov_membase_reg (code, cfg->ret->inst_basereg, cfg->ret->inst_offset, cinfo->ret.reg, 8);
4403                 }
4404         }
4405
4406         /* Keep this in sync with emit_load_volatile_arguments */
4407         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4408                 ArgInfo *ainfo = cinfo->args + i;
4409                 gint32 stack_offset;
4410                 MonoType *arg_type;
4411                 inst = cfg->varinfo [i];
4412
4413                 if (sig->hasthis && (i == 0))
4414                         arg_type = &mono_defaults.object_class->byval_arg;
4415                 else
4416                         arg_type = sig->params [i - sig->hasthis];
4417
4418                 stack_offset = ainfo->offset + ARGS_OFFSET;
4419
4420                 /* Save volatile arguments to the stack */
4421                 if (inst->opcode != OP_REGVAR) {
4422                         switch (ainfo->storage) {
4423                         case ArgInIReg: {
4424                                 guint32 size = 8;
4425
4426                                 /* FIXME: I1 etc */
4427                                 /*
4428                                 if (stack_offset & 0x1)
4429                                         size = 1;
4430                                 else if (stack_offset & 0x2)
4431                                         size = 2;
4432                                 else if (stack_offset & 0x4)
4433                                         size = 4;
4434                                 else
4435                                         size = 8;
4436                                 */
4437                                 amd64_mov_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg, size);
4438                                 break;
4439                         }
4440                         case ArgInFloatSSEReg:
4441                                 amd64_movss_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4442                                 break;
4443                         case ArgInDoubleSSEReg:
4444                                 amd64_movsd_membase_reg (code, inst->inst_basereg, inst->inst_offset, ainfo->reg);
4445                                 break;
4446                         default:
4447                                 break;
4448                         }
4449                 }
4450
4451                 if (inst->opcode == OP_REGVAR) {
4452                         /* Argument allocated to (non-volatile) register */
4453                         switch (ainfo->storage) {
4454                         case ArgInIReg:
4455                                 amd64_mov_reg_reg (code, inst->dreg, ainfo->reg, 8);
4456                                 break;
4457                         case ArgOnStack:
4458                                 amd64_mov_reg_membase (code, inst->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
4459                                 break;
4460                         default:
4461                                 g_assert_not_reached ();
4462                         }
4463                 }
4464         }
4465
4466         if (method->save_lmf) {
4467                 if (lmf_tls_offset != -1) {
4468                         /* Load lmf quicky using the FS register */
4469                         x86_prefix (code, X86_FS_PREFIX);
4470                         amd64_mov_reg_mem (code, AMD64_RAX, lmf_tls_offset, 8);
4471                 }
4472                 else {
4473                         /* 
4474                          * The call might clobber argument registers, but they are already
4475                          * saved to the stack/global regs.
4476                          */
4477
4478                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4479                                                                  (gpointer)"mono_get_lmf_addr");                
4480                 }
4481
4482                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4483
4484                 /* Save lmf_addr */
4485                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), AMD64_RAX, 8);
4486                 /* Save previous_lmf */
4487                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RAX, 0, 8);
4488                 amd64_mov_membase_reg (code, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), AMD64_R11, 8);
4489                 /* Set new lmf */
4490                 amd64_lea_membase (code, AMD64_R11, AMD64_RBP, lmf_offset);
4491                 amd64_mov_membase_reg (code, AMD64_RAX, 0, AMD64_R11, 8);
4492         }
4493
4494
4495         g_free (cinfo);
4496
4497         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4498                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4499
4500         cfg->code_len = code - cfg->native_code;
4501
4502         g_assert (cfg->code_len < cfg->code_size);
4503
4504         return code;
4505 }
4506
4507 void
4508 mono_arch_emit_epilog (MonoCompile *cfg)
4509 {
4510         MonoJumpInfo *patch_info;
4511         MonoMethod *method = cfg->method;
4512         int pos, i;
4513         guint8 *code;
4514
4515         code = cfg->native_code + cfg->code_len;
4516
4517         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4518                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4519
4520         /* the code restoring the registers must be kept in sync with CEE_JMP */
4521         pos = 0;
4522         
4523         if (method->save_lmf) {
4524                 gint32 lmf_offset = - cfg->arch.lmf_offset;
4525
4526                 /* Restore previous lmf */
4527                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 8);
4528                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 8);
4529                 amd64_mov_membase_reg (code, AMD64_R11, 0, AMD64_RCX, 8);
4530
4531                 /* Restore caller saved regs */
4532                 if (cfg->used_int_regs & (1 << AMD64_RBX)) {
4533                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, rbx), 8);
4534                 }
4535                 if (cfg->used_int_regs & (1 << AMD64_R12)) {
4536                         amd64_mov_reg_membase (code, AMD64_R12, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r12), 8);
4537                 }
4538                 if (cfg->used_int_regs & (1 << AMD64_R13)) {
4539                         amd64_mov_reg_membase (code, AMD64_R13, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r13), 8);
4540                 }
4541                 if (cfg->used_int_regs & (1 << AMD64_R14)) {
4542                         amd64_mov_reg_membase (code, AMD64_R14, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r14), 8);
4543                 }
4544                 if (cfg->used_int_regs & (1 << AMD64_R15)) {
4545                         amd64_mov_reg_membase (code, AMD64_R15, AMD64_RBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, r15), 8);
4546                 }
4547         } else {
4548
4549                 for (i = 0; i < AMD64_NREG; ++i)
4550                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i)))
4551                                 pos -= sizeof (gpointer);
4552
4553                 if (pos) {
4554                         if (pos == - sizeof (gpointer)) {
4555                                 /* Only one register, so avoid lea */
4556                                 for (i = AMD64_NREG - 1; i > 0; --i)
4557                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4558                                                 amd64_mov_reg_membase (code, i, AMD64_RBP, pos, 8);
4559                                         }
4560                         }
4561                         else {
4562                                 amd64_lea_membase (code, AMD64_RSP, AMD64_RBP, pos);
4563
4564                                 /* Pop registers in reverse order */
4565                                 for (i = AMD64_NREG - 1; i > 0; --i)
4566                                         if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4567                                                 amd64_pop_reg (code, i);
4568                                         }
4569                         }
4570                 }
4571         }
4572
4573         amd64_leave (code);
4574         amd64_ret (code);
4575
4576         /* add code to raise exceptions */
4577         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4578                 switch (patch_info->type) {
4579                 case MONO_PATCH_INFO_EXC: {
4580                         guint64 offset;
4581
4582                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
4583                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
4584                         amd64_set_reg_template (code, AMD64_RDI);
4585                         /* 7 is the length of the lea */
4586                         offset = (((guint64)code + 7) - (guint64)cfg->native_code) - (guint64)patch_info->ip.i;
4587                         amd64_lea_membase (code, AMD64_RSI, AMD64_RIP, - offset);
4588                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4589                         patch_info->data.name = "mono_arch_throw_exception_by_name";
4590                         patch_info->ip.i = code - cfg->native_code;
4591                         EMIT_CALL ();
4592                         break;
4593                 }
4594                 default:
4595                         /* do nothing */
4596                         break;
4597                 }
4598         }
4599
4600         /* Handle relocations with RIP relative addressing */
4601         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4602                 gboolean remove = FALSE;
4603
4604                 switch (patch_info->type) {
4605                 case MONO_PATCH_INFO_R8: {
4606                         code = (guint8*)ALIGN_TO (code, 8);
4607
4608                         guint8* pos = cfg->native_code + patch_info->ip.i;
4609
4610                         *(double*)code = *(double*)patch_info->data.target;
4611
4612                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4613                         code += 8;
4614
4615                         remove = TRUE;
4616                         break;
4617                 }
4618                 case MONO_PATCH_INFO_R4: {
4619                         code = (guint8*)ALIGN_TO (code, 8);
4620
4621                         guint8* pos = cfg->native_code + patch_info->ip.i;
4622
4623                         *(float*)code = *(float*)patch_info->data.target;
4624
4625                         *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
4626                         code += 4;
4627
4628                         remove = TRUE;
4629                         break;
4630                 }
4631                 default:
4632                         break;
4633                 }
4634
4635                 if (remove) {
4636                         if (patch_info == cfg->patch_info)
4637                                 cfg->patch_info = patch_info->next;
4638                         else {
4639                                 MonoJumpInfo *tmp;
4640
4641                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
4642                                         ;
4643                                 tmp->next = patch_info->next;
4644                         }
4645                 }
4646         }
4647
4648         cfg->code_len = code - cfg->native_code;
4649
4650         g_assert (cfg->code_len < cfg->code_size);
4651
4652 }
4653
4654 /*
4655  * Allow tracing to work with this interface (with an optional argument)
4656  */
4657
4658 /*
4659  * This may be needed on some archs or for debugging support.
4660  */
4661 void
4662 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
4663 {
4664         /* no stack room needed now (may be needed for FASTCALL-trace support) */
4665         *stack = 0;
4666         /* split prolog-epilog requirements? */
4667         *code = 50; /* max bytes needed: check this number */
4668 }
4669
4670 void*
4671 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4672 {
4673         guchar *code = p;
4674         CallInfo *cinfo;
4675         MonoMethodSignature *sig;
4676         MonoInst *inst;
4677         int i, n, stack_area = 0;
4678
4679         /* Keep this in sync with mono_arch_get_argument_info */
4680
4681         if (enable_arguments) {
4682                 /* Allocate a new area on the stack and save arguments there */
4683                 sig = cfg->method->signature;
4684
4685                 cinfo = get_call_info (sig, FALSE);
4686
4687                 n = sig->param_count + sig->hasthis;
4688
4689                 stack_area = ALIGN_TO (n * 8, 16);
4690
4691                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
4692
4693                 for (i = 0; i < n; ++i) {
4694                         ArgInfo *ainfo = cinfo->args + i;
4695                         gint32 stack_offset;
4696                         MonoType *arg_type;
4697                         inst = cfg->varinfo [i];
4698
4699                         if (sig->hasthis && (i == 0))
4700                                 arg_type = &mono_defaults.object_class->byval_arg;
4701                         else
4702                                 arg_type = sig->params [i - sig->hasthis];
4703
4704                         stack_offset = ainfo->offset + ARGS_OFFSET;
4705
4706                         switch (ainfo->storage) {
4707                         case ArgInIReg:
4708                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg, 8);                                
4709                                 break;
4710                         case ArgInFloatSSEReg:
4711                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4712                                 break;
4713                         case ArgInDoubleSSEReg:
4714                                 amd64_movsd_membase_reg (code, AMD64_RSP, (i * 8), ainfo->reg);
4715                                 break;
4716                         case ArgOnStack:
4717                                 /* Copy from original stack location to the argument area */
4718                                 /* FIXME: valuetypes etc */
4719                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
4720                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
4721                                 break;
4722                         default:
4723                                 g_assert_not_reached ();
4724                         }
4725                 }
4726         }
4727
4728         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4729         amd64_set_reg_template (code, AMD64_RDI);
4730         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RSP, 8);
4731         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4732
4733         if (enable_arguments) {
4734                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
4735
4736                 g_free (cinfo);
4737         }
4738
4739         return code;
4740 }
4741
4742 enum {
4743         SAVE_NONE,
4744         SAVE_STRUCT,
4745         SAVE_EAX,
4746         SAVE_EAX_EDX,
4747         SAVE_XMM
4748 };
4749
4750 void*
4751 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4752 {
4753         guchar *code = p;
4754         int save_mode = SAVE_NONE;
4755         MonoMethod *method = cfg->method;
4756         int rtype = method->signature->ret->type;
4757
4758 handle_enum:
4759         switch (rtype) {
4760         case MONO_TYPE_VOID:
4761                 /* special case string .ctor icall */
4762                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
4763                         save_mode = SAVE_EAX;
4764                 else
4765                         save_mode = SAVE_NONE;
4766                 break;
4767         case MONO_TYPE_I8:
4768         case MONO_TYPE_U8:
4769                 save_mode = SAVE_EAX;
4770                 break;
4771         case MONO_TYPE_R4:
4772         case MONO_TYPE_R8:
4773                 save_mode = SAVE_XMM;
4774                 break;
4775         case MONO_TYPE_VALUETYPE:
4776                 if (method->signature->ret->data.klass->enumtype) {
4777                         rtype = method->signature->ret->data.klass->enum_basetype->type;
4778                         goto handle_enum;
4779                 }
4780                 save_mode = SAVE_STRUCT;
4781                 break;
4782         default:
4783                 save_mode = SAVE_EAX;
4784                 break;
4785         }
4786
4787         /* Save the result and copy it into the proper argument register */
4788         switch (save_mode) {
4789         case SAVE_EAX:
4790                 amd64_push_reg (code, AMD64_RAX);
4791                 /* Align stack */
4792                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4793                 if (enable_arguments)
4794                         amd64_mov_reg_reg (code, AMD64_RSI, AMD64_RAX, 8);
4795                 break;
4796         case SAVE_STRUCT:
4797                 /* FIXME: */
4798                 if (enable_arguments)
4799                         amd64_mov_reg_imm (code, AMD64_RSI, 0);
4800                 break;
4801         case SAVE_XMM:
4802                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4803                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
4804                 /* Align stack */
4805                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4806                 /* 
4807                  * The result is already in the proper argument register so no copying
4808                  * needed.
4809                  */
4810                 break;
4811         case SAVE_NONE:
4812                 break;
4813         default:
4814                 g_assert_not_reached ();
4815         }
4816
4817         /* Set %al since this is a varargs call */
4818         if (save_mode == SAVE_XMM)
4819                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
4820         else
4821                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
4822
4823         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
4824         amd64_set_reg_template (code, AMD64_RDI);
4825         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4826
4827         /* Restore result */
4828         switch (save_mode) {
4829         case SAVE_EAX:
4830                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4831                 amd64_pop_reg (code, AMD64_RAX);
4832                 break;
4833         case SAVE_STRUCT:
4834                 /* FIXME: */
4835                 break;
4836         case SAVE_XMM:
4837                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4838                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
4839                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4840                 break;
4841         case SAVE_NONE:
4842                 break;
4843         default:
4844                 g_assert_not_reached ();
4845         }
4846
4847         return code;
4848 }
4849
4850 int
4851 mono_arch_max_epilog_size (MonoCompile *cfg)
4852 {
4853         int max_epilog_size = 16;
4854         MonoJumpInfo *patch_info;
4855         
4856         if (cfg->method->save_lmf)
4857                 max_epilog_size += 256;
4858         
4859         if (mono_jit_trace_calls != NULL)
4860                 max_epilog_size += 50;
4861
4862         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4863                 max_epilog_size += 50;
4864
4865         max_epilog_size += (AMD64_NREG * 2);
4866
4867         /* 
4868          * make sure we have enough space for exceptions
4869          */
4870         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4871                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4872                         max_epilog_size += 40;
4873                 if (patch_info->type == MONO_PATCH_INFO_R8)
4874                         max_epilog_size += 8 + 7; /* sizeof (double) + alignment */
4875                 if (patch_info->type == MONO_PATCH_INFO_R4)
4876                         max_epilog_size += 4 + 7; /* sizeof (float) + alignment */
4877         }
4878
4879         return max_epilog_size;
4880 }
4881
4882 void
4883 mono_arch_flush_icache (guint8 *code, gint size)
4884 {
4885         /* not needed */
4886 }
4887
4888 void
4889 mono_arch_flush_register_windows (void)
4890 {
4891 }
4892
4893 gboolean 
4894 mono_arch_is_inst_imm (gint64 imm)
4895 {
4896         return amd64_is_imm32 (imm);
4897 }
4898
4899 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
4900
4901 static int reg_to_ucontext_reg [] = {
4902         REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
4903         REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
4904         REG_RIP
4905 };
4906
4907 /*
4908  * Determine whenever the trap whose info is in SIGINFO is caused by
4909  * integer overflow.
4910  */
4911 gboolean
4912 mono_arch_is_int_overflow (void *sigctx)
4913 {
4914         ucontext_t *ctx = (ucontext_t*)sigctx;
4915         guint8* rip;
4916         int reg;
4917
4918         rip = (guint8*)ctx->uc_mcontext.gregs [REG_RIP];
4919
4920         if (IS_REX (rip [0])) {
4921                 reg = amd64_rex_r (rip [0]);
4922                 rip ++;
4923         }
4924         else
4925                 reg = 0;
4926
4927         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
4928                 /* idiv REG */
4929                 reg += x86_modrm_rm (rip [1]);
4930
4931                 if (ctx->uc_mcontext.gregs [reg_to_ucontext_reg [reg]] == -1)
4932                         return TRUE;
4933         }
4934
4935         return FALSE;
4936 }
4937
4938 gpointer*
4939 mono_amd64_get_vcall_slot_addr (guint8* code, guint64 *regs)
4940 {
4941         guint32 reg;
4942         guint32 disp;
4943         guint8 rex = 0;
4944
4945         /* go to the start of the call instruction
4946          *
4947          * address_byte = (m << 6) | (o << 3) | reg
4948          * call opcode: 0xff address_byte displacement
4949          * 0xff m=1,o=2 imm8
4950          * 0xff m=2,o=2 imm32
4951          */
4952         code -= 6;
4953
4954         if (IS_REX (code [3]) && (code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x3)) {
4955                 /* call *%reg */
4956                 return NULL;
4957         }
4958         else if ((code [0] == 0xff) && (amd64_modrm_reg (code [1]) == 0x2) && (amd64_modrm_mod (code [1]) == 0x2)) {
4959                 /* call *[reg+disp32] */
4960                 reg = amd64_modrm_rm (code [1]);
4961                 disp = *(guint32*)(code + 2);
4962                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4963         }
4964         else if ((code [3] == 0xff) && (amd64_modrm_reg (code [4]) == 0x2) && (amd64_modrm_mod (code [4]) == 0x1)) {
4965                 /* call *[reg+disp8] */
4966                 reg = amd64_modrm_rm (code [4]);
4967                 disp = *(guint8*)(code + 5);
4968                 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
4969         }
4970         else if ((code [4] == 0xff) && (amd64_modrm_reg (code [5]) == 0x2) && (amd64_modrm_mod (code [5]) == 0x0)) {
4971                         /*
4972                          * This is a interface call: should check the above code can't catch it earlier 
4973                          * 8b 40 30   mov    0x30(%eax),%eax
4974                          * ff 10      call   *(%eax)
4975                          */
4976                 reg = amd64_modrm_rm (code [5]);
4977                 disp = 0;
4978         }
4979         else
4980                 g_assert_not_reached ();
4981
4982         reg += amd64_rex_b (rex);
4983
4984         /* FIXME: */
4985         return (gpointer)((regs [reg]) + disp);
4986 }
4987
4988 /*
4989  * Support for fast access to the thread-local lmf structure using the GS
4990  * segment register on NPTL + kernel 2.6.x.
4991  */
4992
4993 static gboolean tls_offset_inited = FALSE;
4994
4995 /* code should be simply return <tls var>; */
4996 static int 
4997 read_tls_offset_from_method (void* method)
4998 {
4999         guint8 *code = (guint8*)method;
5000
5001         /* 
5002          * Determine the offset of mono_lfm_addr inside the TLS structures
5003          * by disassembling the function above.
5004          */
5005         /* This is generated by gcc 3.3.2 */
5006         if ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
5007                 (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
5008                 (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25) &&
5009                 (code [9] == 0x00) && (code [10] == 0x00) && (code [11] == 0x00) &&
5010                 (code [12] == 0x0) && (code [13] == 0x48) && (code [14] == 0x8b) &&
5011                 (code [15] == 0x80)) {
5012                 return *(gint32*)&(code [16]);
5013         } else if
5014                 /* This is generated by gcc-3.3.2 with -O=2 */
5015                 /* mov fs:0, %rax ; mov <offset>(%rax), %rax ; retq */
5016                 ((code [0] == 0x64) && (code [1] == 0x48) && (code [2] == 0x8b) &&
5017                  (code [3] == 0x04) && (code [4] == 0x25) &&
5018                  (code [9] == 0x48) && (code [10] == 0x8b) && (code [11] == 0x80) &&
5019                  (code [16] == 0xc3)) {
5020                         return *(gint32*)&(code [12]);
5021         } else if 
5022                 /* This is generated by gcc-3.4.1 */
5023                 ((code [0] == 0x55) && (code [1] == 0x48) && (code [2] == 0x89) &&
5024                  (code [3] == 0xe5) && (code [4] == 0x64) && (code [5] == 0x48) &&
5025                  (code [6] == 0x8b) && (code [7] == 0x04) && (code [8] == 0x25) &&
5026                  (code [13] == 0xc9) && (code [14] == 0xc3)) {
5027                         return *(gint32*)&(code [9]);
5028         } else if
5029                 /* This is generated by gcc-3.4.1 with -O=2 */
5030                 ((code [0] == 0x64) && (code [1] == 0x48) && (code [2] == 0x8b) &&
5031                  (code [3] == 0x04) && (code [4] == 0x25)) {
5032                 return *(gint32*)&(code [5]);
5033         }
5034
5035         return -1;
5036 }
5037
5038 void
5039 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
5040 {
5041 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5042         pthread_t self = pthread_self();
5043         pthread_attr_t attr;
5044         void *staddr = NULL;
5045         size_t stsize = 0;
5046         struct sigaltstack sa;
5047 #endif
5048
5049         if (!tls_offset_inited) {
5050                 tls_offset_inited = TRUE;
5051
5052                 lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
5053                 appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
5054                 //thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
5055         }               
5056
5057 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5058
5059         /* Determine stack boundaries */
5060         if (!mono_running_on_valgrind ()) {
5061 #ifdef HAVE_PTHREAD_GETATTR_NP
5062                 pthread_getattr_np( self, &attr );
5063 #else
5064 #ifdef HAVE_PTHREAD_ATTR_GET_NP
5065                 pthread_attr_get_np( self, &attr );
5066 #elif defined(sun)
5067                 pthread_attr_init( &attr );
5068                 pthread_attr_getstacksize( &attr, &stsize );
5069 #else
5070 #error "Not implemented"
5071 #endif
5072 #endif
5073 #ifndef sun
5074                 pthread_attr_getstack( &attr, &staddr, &stsize );
5075 #endif
5076         }
5077
5078         /* 
5079          * staddr seems to be wrong for the main thread, so we keep the value in
5080          * tls->end_of_stack
5081          */
5082         tls->stack_size = stsize;
5083
5084         /* Setup an alternate signal stack */
5085         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
5086         tls->signal_stack_size = SIGNAL_STACK_SIZE;
5087
5088         sa.ss_sp = tls->signal_stack;
5089         sa.ss_size = SIGNAL_STACK_SIZE;
5090         sa.ss_flags = SS_ONSTACK;
5091         sigaltstack (&sa, NULL);
5092 #endif
5093 }
5094
5095 void
5096 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
5097 {
5098 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
5099         struct sigaltstack sa;
5100
5101         sa.ss_sp = tls->signal_stack;
5102         sa.ss_size = SIGNAL_STACK_SIZE;
5103         sa.ss_flags = SS_DISABLE;
5104         sigaltstack  (&sa, NULL);
5105
5106         if (tls->signal_stack)
5107                 g_free (tls->signal_stack);
5108 #endif
5109 }
5110
5111 void
5112 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
5113 {
5114         int out_reg = param_regs [0];
5115
5116         /* FIXME: RDI and RSI might get clobbered */
5117
5118         if (vt_reg != -1) {
5119                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
5120                 MonoInst *vtarg;
5121
5122                 if (cinfo->ret.storage == ArgValuetypeInReg) {
5123                         /*
5124                          * The valuetype is in RAX:RDX after the call, need to be copied to
5125                          * the stack. Push the address here, so the call instruction can
5126                          * access it.
5127                          */
5128                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
5129                         vtarg->sreg1 = vt_reg;
5130                         mono_bblock_add_inst (cfg->cbb, vtarg);
5131
5132                         /* Align stack */
5133                         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
5134                 }
5135                 else {
5136                         MONO_INST_NEW (cfg, vtarg, OP_SETREG);
5137                         vtarg->sreg1 = vt_reg;
5138                         vtarg->dreg = out_reg;
5139                         out_reg = param_regs [1];
5140                         mono_bblock_add_inst (cfg->cbb, vtarg);
5141                 }
5142
5143                 g_free (cinfo);
5144         }
5145
5146         /* add the this argument */
5147         if (this_reg != -1) {
5148                 MonoInst *this;
5149                 MONO_INST_NEW (cfg, this, OP_SETREG);
5150                 this->type = this_type;
5151                 this->sreg1 = this_reg;
5152                 this->dreg = out_reg;
5153                 mono_bblock_add_inst (cfg->cbb, this);
5154         }
5155 }
5156
5157 gint
5158 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5159 {
5160         if (cmethod->klass == mono_defaults.math_class) {
5161                 if (strcmp (cmethod->name, "Sin") == 0)
5162                         return OP_SIN;
5163                 else if (strcmp (cmethod->name, "Cos") == 0)
5164                         return OP_COS;
5165                 else if (strcmp (cmethod->name, "Tan") == 0)
5166                         return OP_TAN;
5167                 else if (strcmp (cmethod->name, "Atan") == 0)
5168                         return OP_ATAN;
5169                 else if (strcmp (cmethod->name, "Sqrt") == 0)
5170                         return OP_SQRT;
5171                 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
5172                         return OP_ABS;
5173 #if 0
5174                 /* OP_FREM is not IEEE compatible */
5175                 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
5176                         return OP_FREM;
5177 #endif
5178                 else
5179                         return -1;
5180         } else {
5181                 return -1;
5182         }
5183         return -1;
5184 }
5185
5186
5187 gboolean
5188 mono_arch_print_tree (MonoInst *tree, int arity)
5189 {
5190         return 0;
5191 }
5192
5193 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5194 {
5195         MonoInst* ins;
5196         
5197         if (appdomain_tls_offset == -1)
5198                 return NULL;
5199         
5200         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5201         ins->inst_offset = appdomain_tls_offset;
5202         return ins;
5203 }
5204
5205 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5206 {
5207         MonoInst* ins;
5208         
5209         if (thread_tls_offset == -1)
5210                 return NULL;
5211         
5212         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
5213         ins->inst_offset = thread_tls_offset;
5214         return ins;
5215 }