Merge pull request #2994 from ludovic-henry/remove-nacl
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *   Zoltan Varga (vargaz@gmail.com)
11  *
12  * (C) 2003 Ximian, Inc.
13  * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
14  * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
15  * Licensed under the MIT license. See LICENSE file in the project root for full license information.
16  */
17 #include "mini.h"
18 #include <string.h>
19 #include <math.h>
20 #ifdef HAVE_UNISTD_H
21 #include <unistd.h>
22 #endif
23
24 #include <mono/metadata/abi-details.h>
25 #include <mono/metadata/appdomain.h>
26 #include <mono/metadata/debug-helpers.h>
27 #include <mono/metadata/threads.h>
28 #include <mono/metadata/profiler-private.h>
29 #include <mono/metadata/mono-debug.h>
30 #include <mono/metadata/gc-internals.h>
31 #include <mono/utils/mono-math.h>
32 #include <mono/utils/mono-mmap.h>
33 #include <mono/utils/mono-memory-model.h>
34 #include <mono/utils/mono-tls.h>
35 #include <mono/utils/mono-hwcap-x86.h>
36 #include <mono/utils/mono-threads.h>
37
38 #include "trace.h"
39 #include "ir-emit.h"
40 #include "mini-amd64.h"
41 #include "cpu-amd64.h"
42 #include "debugger-agent.h"
43 #include "mini-gc.h"
44
45 #ifdef MONO_XEN_OPT
46 static gboolean optimize_for_xen = TRUE;
47 #else
48 #define optimize_for_xen 0
49 #endif
50
51 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
52
53 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
54
55 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
56
57 #ifdef TARGET_WIN32
58 /* Under windows, the calling convention is never stdcall */
59 #define CALLCONV_IS_STDCALL(call_conv) (FALSE)
60 #else
61 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
62 #endif
63
64 /* This mutex protects architecture specific caches */
65 #define mono_mini_arch_lock() mono_os_mutex_lock (&mini_arch_mutex)
66 #define mono_mini_arch_unlock() mono_os_mutex_unlock (&mini_arch_mutex)
67 static mono_mutex_t mini_arch_mutex;
68
69 /* The single step trampoline */
70 static gpointer ss_trampoline;
71
72 /* The breakpoint trampoline */
73 static gpointer bp_trampoline;
74
75 /* Offset between fp and the first argument in the callee */
76 #define ARGS_OFFSET 16
77 #define GP_SCRATCH_REG AMD64_R11
78
79 /*
80  * AMD64 register usage:
81  * - callee saved registers are used for global register allocation
82  * - %r11 is used for materializing 64 bit constants in opcodes
83  * - the rest is used for local allocation
84  */
85
86 /*
87  * Floating point comparison results:
88  *                  ZF PF CF
89  * A > B            0  0  0
90  * A < B            0  0  1
91  * A = B            1  0  0
92  * A > B            0  0  0
93  * UNORDERED        1  1  1
94  */
95
96 const char*
97 mono_arch_regname (int reg)
98 {
99         switch (reg) {
100         case AMD64_RAX: return "%rax";
101         case AMD64_RBX: return "%rbx";
102         case AMD64_RCX: return "%rcx";
103         case AMD64_RDX: return "%rdx";
104         case AMD64_RSP: return "%rsp";  
105         case AMD64_RBP: return "%rbp";
106         case AMD64_RDI: return "%rdi";
107         case AMD64_RSI: return "%rsi";
108         case AMD64_R8: return "%r8";
109         case AMD64_R9: return "%r9";
110         case AMD64_R10: return "%r10";
111         case AMD64_R11: return "%r11";
112         case AMD64_R12: return "%r12";
113         case AMD64_R13: return "%r13";
114         case AMD64_R14: return "%r14";
115         case AMD64_R15: return "%r15";
116         }
117         return "unknown";
118 }
119
120 static const char * packed_xmmregs [] = {
121         "p:xmm0", "p:xmm1", "p:xmm2", "p:xmm3", "p:xmm4", "p:xmm5", "p:xmm6", "p:xmm7", "p:xmm8",
122         "p:xmm9", "p:xmm10", "p:xmm11", "p:xmm12", "p:xmm13", "p:xmm14", "p:xmm15"
123 };
124
125 static const char * single_xmmregs [] = {
126         "s:xmm0", "s:xmm1", "s:xmm2", "s:xmm3", "s:xmm4", "s:xmm5", "s:xmm6", "s:xmm7", "s:xmm8",
127         "s:xmm9", "s:xmm10", "s:xmm11", "s:xmm12", "s:xmm13", "s:xmm14", "s:xmm15"
128 };
129
130 const char*
131 mono_arch_fregname (int reg)
132 {
133         if (reg < AMD64_XMM_NREG)
134                 return single_xmmregs [reg];
135         else
136                 return "unknown";
137 }
138
139 const char *
140 mono_arch_xregname (int reg)
141 {
142         if (reg < AMD64_XMM_NREG)
143                 return packed_xmmregs [reg];
144         else
145                 return "unknown";
146 }
147
148 static gboolean
149 debug_omit_fp (void)
150 {
151 #if 0
152         return mono_debug_count ();
153 #else
154         return TRUE;
155 #endif
156 }
157
158 static inline gboolean
159 amd64_is_near_call (guint8 *code)
160 {
161         /* Skip REX */
162         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
163                 code += 1;
164
165         return code [0] == 0xe8;
166 }
167
168 static inline gboolean
169 amd64_use_imm32 (gint64 val)
170 {
171         if (mini_get_debug_options()->single_imm_size)
172                 return FALSE;
173
174         return amd64_is_imm32 (val);
175 }
176
177 static void
178 amd64_patch (unsigned char* code, gpointer target)
179 {
180         guint8 rex = 0;
181
182         /* Skip REX */
183         if ((code [0] >= 0x40) && (code [0] <= 0x4f)) {
184                 rex = code [0];
185                 code += 1;
186         }
187
188         if ((code [0] & 0xf8) == 0xb8) {
189                 /* amd64_set_reg_template */
190                 *(guint64*)(code + 1) = (guint64)target;
191         }
192         else if ((code [0] == 0x8b) && rex && x86_modrm_mod (code [1]) == 0 && x86_modrm_rm (code [1]) == 5) {
193                 /* mov 0(%rip), %dreg */
194                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
195         }
196         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
197                 /* call *<OFFSET>(%rip) */
198                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
199         }
200         else if (code [0] == 0xe8) {
201                 /* call <DISP> */
202                 gint64 disp = (guint8*)target - (guint8*)code;
203                 g_assert (amd64_is_imm32 (disp));
204                 x86_patch (code, (unsigned char*)target);
205         }
206         else
207                 x86_patch (code, (unsigned char*)target);
208 }
209
210 void 
211 mono_amd64_patch (unsigned char* code, gpointer target)
212 {
213         amd64_patch (code, target);
214 }
215
216 #define DEBUG(a) if (cfg->verbose_level > 1) a
217
218 static void inline
219 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
220 {
221     ainfo->offset = *stack_size;
222
223     if (*gr >= PARAM_REGS) {
224                 ainfo->storage = ArgOnStack;
225                 ainfo->arg_size = sizeof (mgreg_t);
226                 /* Since the same stack slot size is used for all arg */
227                 /*  types, it needs to be big enough to hold them all */
228                 (*stack_size) += sizeof(mgreg_t);
229     }
230     else {
231                 ainfo->storage = ArgInIReg;
232                 ainfo->reg = param_regs [*gr];
233                 (*gr) ++;
234     }
235 }
236
237 static void inline
238 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
239 {
240     ainfo->offset = *stack_size;
241
242     if (*gr >= FLOAT_PARAM_REGS) {
243                 ainfo->storage = ArgOnStack;
244                 ainfo->arg_size = sizeof (mgreg_t);
245                 /* Since the same stack slot size is used for both float */
246                 /*  types, it needs to be big enough to hold them both */
247                 (*stack_size) += sizeof(mgreg_t);
248     }
249     else {
250                 /* A double register */
251                 if (is_double)
252                         ainfo->storage = ArgInDoubleSSEReg;
253                 else
254                         ainfo->storage = ArgInFloatSSEReg;
255                 ainfo->reg = *gr;
256                 (*gr) += 1;
257     }
258 }
259
260 typedef enum ArgumentClass {
261         ARG_CLASS_NO_CLASS,
262         ARG_CLASS_MEMORY,
263         ARG_CLASS_INTEGER,
264         ARG_CLASS_SSE
265 } ArgumentClass;
266
267 static ArgumentClass
268 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
269 {
270         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
271         MonoType *ptype;
272
273         ptype = mini_get_underlying_type (type);
274         switch (ptype->type) {
275         case MONO_TYPE_I1:
276         case MONO_TYPE_U1:
277         case MONO_TYPE_I2:
278         case MONO_TYPE_U2:
279         case MONO_TYPE_I4:
280         case MONO_TYPE_U4:
281         case MONO_TYPE_I:
282         case MONO_TYPE_U:
283         case MONO_TYPE_STRING:
284         case MONO_TYPE_OBJECT:
285         case MONO_TYPE_CLASS:
286         case MONO_TYPE_SZARRAY:
287         case MONO_TYPE_PTR:
288         case MONO_TYPE_FNPTR:
289         case MONO_TYPE_ARRAY:
290         case MONO_TYPE_I8:
291         case MONO_TYPE_U8:
292                 class2 = ARG_CLASS_INTEGER;
293                 break;
294         case MONO_TYPE_R4:
295         case MONO_TYPE_R8:
296 #ifdef TARGET_WIN32
297                 class2 = ARG_CLASS_INTEGER;
298 #else
299                 class2 = ARG_CLASS_SSE;
300 #endif
301                 break;
302
303         case MONO_TYPE_TYPEDBYREF:
304                 g_assert_not_reached ();
305
306         case MONO_TYPE_GENERICINST:
307                 if (!mono_type_generic_inst_is_valuetype (ptype)) {
308                         class2 = ARG_CLASS_INTEGER;
309                         break;
310                 }
311                 /* fall through */
312         case MONO_TYPE_VALUETYPE: {
313                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
314                 int i;
315
316                 for (i = 0; i < info->num_fields; ++i) {
317                         class2 = class1;
318                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
319                 }
320                 break;
321         }
322         default:
323                 g_assert_not_reached ();
324         }
325
326         /* Merge */
327         if (class1 == class2)
328                 ;
329         else if (class1 == ARG_CLASS_NO_CLASS)
330                 class1 = class2;
331         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
332                 class1 = ARG_CLASS_MEMORY;
333         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
334                 class1 = ARG_CLASS_INTEGER;
335         else
336                 class1 = ARG_CLASS_SSE;
337
338         return class1;
339 }
340
341 static int
342 count_fields_nested (MonoClass *klass)
343 {
344         MonoMarshalType *info;
345         int i, count;
346
347         info = mono_marshal_load_type_info (klass);
348         g_assert(info);
349         count = 0;
350         for (i = 0; i < info->num_fields; ++i) {
351                 if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type))
352                         count += count_fields_nested (mono_class_from_mono_type (info->fields [i].field->type));
353                 else
354                         count ++;
355         }
356         return count;
357 }
358
359 static int
360 collect_field_info_nested (MonoClass *klass, MonoMarshalField *fields, int index, int offset)
361 {
362         MonoMarshalType *info;
363         int i;
364
365         info = mono_marshal_load_type_info (klass);
366         g_assert(info);
367         for (i = 0; i < info->num_fields; ++i) {
368                 if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type)) {
369                         index = collect_field_info_nested (mono_class_from_mono_type (info->fields [i].field->type), fields, index, info->fields [i].offset);
370                 } else {
371                         memcpy (&fields [index], &info->fields [i], sizeof (MonoMarshalField));
372                         fields [index].offset += offset;
373                         index ++;
374                 }
375         }
376         return index;
377 }
378
379 #ifdef TARGET_WIN32
380 static void
381 add_valuetype_win64 (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
382                                          gboolean is_return,
383                                          guint32 *gr, guint32 *fr, guint32 *stack_size)
384 {
385         guint32 size, i, nfields;
386         guint32 argsize = 8;
387         ArgumentClass arg_class;
388         MonoMarshalType *info = NULL;
389         MonoMarshalField *fields = NULL;
390         MonoClass *klass;
391         gboolean pass_on_stack = FALSE;
392
393         klass = mono_class_from_mono_type (type);
394         size = mini_type_stack_size_full (&klass->byval_arg, NULL, sig->pinvoke);
395         if (!sig->pinvoke)
396                 pass_on_stack = TRUE;
397
398         /* If this struct can't be split up naturally into 8-byte */
399         /* chunks (registers), pass it on the stack.              */
400         if (sig->pinvoke && !pass_on_stack) {
401                 guint32 align;
402                 guint32 field_size;
403
404                 info = mono_marshal_load_type_info (klass);
405                 g_assert (info);
406
407                 /*
408                  * Collect field information recursively to be able to
409                  * handle nested structures.
410                  */
411                 nfields = count_fields_nested (klass);
412                 fields = g_new0 (MonoMarshalField, nfields);
413                 collect_field_info_nested (klass, fields, 0, 0);
414
415                 for (i = 0; i < nfields; ++i) {
416                         field_size = mono_marshal_type_size (fields [i].field->type,
417                                                            fields [i].mspec,
418                                                            &align, TRUE, klass->unicode);
419                         if ((fields [i].offset < 8) && (fields [i].offset + field_size) > 8) {
420                                 pass_on_stack = TRUE;
421                                 break;
422                         }
423                 }
424         }
425
426         if (pass_on_stack) {
427                 /* Allways pass in memory */
428                 ainfo->offset = *stack_size;
429                 *stack_size += ALIGN_TO (size, 8);
430                 ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
431                 if (!is_return)
432                         ainfo->arg_size = ALIGN_TO (size, 8);
433
434                 g_free (fields);
435                 return;
436         }
437
438         if (!sig->pinvoke) {
439                 int n = mono_class_value_size (klass, NULL);
440
441                 argsize = n;
442
443                 if (n > 8)
444                         arg_class = ARG_CLASS_MEMORY;
445                 else
446                         /* Always pass in 1 integer register */
447                         arg_class = ARG_CLASS_INTEGER;
448         } else {
449                 g_assert (info);
450
451                 if (!fields) {
452                         ainfo->storage = ArgValuetypeInReg;
453                         ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
454                         return;
455                 }
456
457                 switch (info->native_size) {
458                 case 1: case 2: case 4: case 8:
459                         break;
460                 default:
461                         if (is_return) {
462                                 ainfo->storage = ArgValuetypeAddrInIReg;
463                                 ainfo->offset = *stack_size;
464                                 *stack_size += ALIGN_TO (info->native_size, 8);
465                         }
466                         else {
467                                 ainfo->storage = ArgValuetypeAddrInIReg;
468
469                                 if (*gr < PARAM_REGS) {
470                                         ainfo->pair_storage [0] = ArgInIReg;
471                                         ainfo->pair_regs [0] = param_regs [*gr];
472                                         (*gr) ++;
473                                 }
474                                 else {
475                                         ainfo->pair_storage [0] = ArgOnStack;
476                                         ainfo->offset = *stack_size;
477                                         ainfo->arg_size = sizeof (mgreg_t);
478                                         *stack_size += 8;
479                                 }
480                         }
481
482                         g_free (fields);
483                         return;
484                 }
485
486                 int size;
487                 guint32 align;
488                 ArgumentClass class1;
489
490                 if (nfields == 0)
491                         class1 = ARG_CLASS_MEMORY;
492                 else
493                         class1 = ARG_CLASS_NO_CLASS;
494                 for (i = 0; i < nfields; ++i) {
495                         size = mono_marshal_type_size (fields [i].field->type,
496                                                                                    fields [i].mspec,
497                                                                                    &align, TRUE, klass->unicode);
498                         /* How far into this quad this data extends.*/
499                         /* (8 is size of quad) */
500                         argsize = fields [i].offset + size;
501
502                         class1 = merge_argument_class_from_type (fields [i].field->type, class1);
503                 }
504                 g_assert (class1 != ARG_CLASS_NO_CLASS);
505                 arg_class = class1;
506         }
507
508         g_free (fields);
509
510         /* Allocate registers */
511         {
512                 int orig_gr = *gr;
513                 int orig_fr = *fr;
514
515                 while (argsize != 1 && argsize != 2 && argsize != 4 && argsize != 8)
516                         argsize ++;
517
518                 ainfo->storage = ArgValuetypeInReg;
519                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
520                 ainfo->pair_size [0] = argsize;
521                 ainfo->pair_size [1] = 0;
522                 ainfo->nregs = 1;
523                 switch (arg_class) {
524                 case ARG_CLASS_INTEGER:
525                         if (*gr >= PARAM_REGS)
526                                 arg_class = ARG_CLASS_MEMORY;
527                         else {
528                                 ainfo->pair_storage [0] = ArgInIReg;
529                                 if (is_return)
530                                         ainfo->pair_regs [0] = return_regs [*gr];
531                                 else
532                                         ainfo->pair_regs [0] = param_regs [*gr];
533                                 (*gr) ++;
534                         }
535                         break;
536                 case ARG_CLASS_SSE:
537                         if (*fr >= FLOAT_PARAM_REGS)
538                                 arg_class = ARG_CLASS_MEMORY;
539                         else {
540                                 if (argsize <= 4)
541                                         ainfo->pair_storage [0] = ArgInFloatSSEReg;
542                                 else
543                                         ainfo->pair_storage [0] = ArgInDoubleSSEReg;
544                                 ainfo->pair_regs [0] = *fr;
545                                 (*fr) ++;
546                         }
547                         break;
548                 case ARG_CLASS_MEMORY:
549                         break;
550                 default:
551                         g_assert_not_reached ();
552                 }
553
554                 if (arg_class == ARG_CLASS_MEMORY) {
555                         /* Revert possible register assignments */
556                         *gr = orig_gr;
557                         *fr = orig_fr;
558
559                         ainfo->offset = *stack_size;
560                         *stack_size += sizeof (mgreg_t);
561                         ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
562                         if (!is_return)
563                                 ainfo->arg_size = sizeof (mgreg_t);
564                 }
565         }
566 }
567 #endif /* TARGET_WIN32 */
568
569 static void
570 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
571                            gboolean is_return,
572                            guint32 *gr, guint32 *fr, guint32 *stack_size)
573 {
574 #ifdef TARGET_WIN32
575         add_valuetype_win64 (sig, ainfo, type, is_return, gr, fr, stack_size);
576 #else
577         guint32 size, quad, nquads, i, nfields;
578         /* Keep track of the size used in each quad so we can */
579         /* use the right size when copying args/return vars.  */
580         guint32 quadsize [2] = {8, 8};
581         ArgumentClass args [2];
582         MonoMarshalType *info = NULL;
583         MonoMarshalField *fields = NULL;
584         MonoClass *klass;
585         gboolean pass_on_stack = FALSE;
586
587         klass = mono_class_from_mono_type (type);
588         size = mini_type_stack_size_full (&klass->byval_arg, NULL, sig->pinvoke);
589         if (!sig->pinvoke && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) {
590                 /* We pass and return vtypes of size 8 in a register */
591         } else if (!sig->pinvoke || (size == 0) || (size > 16)) {
592                 pass_on_stack = TRUE;
593         }
594
595         /* If this struct can't be split up naturally into 8-byte */
596         /* chunks (registers), pass it on the stack.              */
597         if (sig->pinvoke && !pass_on_stack) {
598                 guint32 align;
599                 guint32 field_size;
600
601                 info = mono_marshal_load_type_info (klass);
602                 g_assert (info);
603
604                 /*
605                  * Collect field information recursively to be able to
606                  * handle nested structures.
607                  */
608                 nfields = count_fields_nested (klass);
609                 fields = g_new0 (MonoMarshalField, nfields);
610                 collect_field_info_nested (klass, fields, 0, 0);
611
612                 for (i = 0; i < nfields; ++i) {
613                         field_size = mono_marshal_type_size (fields [i].field->type,
614                                                            fields [i].mspec,
615                                                            &align, TRUE, klass->unicode);
616                         if ((fields [i].offset < 8) && (fields [i].offset + field_size) > 8) {
617                                 pass_on_stack = TRUE;
618                                 break;
619                         }
620                 }
621         }
622
623         if (size == 0) {
624                 ainfo->storage = ArgValuetypeInReg;
625                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
626                 return;
627         }
628
629         if (pass_on_stack) {
630                 /* Allways pass in memory */
631                 ainfo->offset = *stack_size;
632                 *stack_size += ALIGN_TO (size, 8);
633                 ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
634                 if (!is_return)
635                         ainfo->arg_size = ALIGN_TO (size, 8);
636
637                 g_free (fields);
638                 return;
639         }
640
641         if (size > 8)
642                 nquads = 2;
643         else
644                 nquads = 1;
645
646         if (!sig->pinvoke) {
647                 int n = mono_class_value_size (klass, NULL);
648
649                 quadsize [0] = n >= 8 ? 8 : n;
650                 quadsize [1] = n >= 8 ? MAX (n - 8, 8) : 0;
651
652                 /* Always pass in 1 or 2 integer registers */
653                 args [0] = ARG_CLASS_INTEGER;
654                 args [1] = ARG_CLASS_INTEGER;
655                 /* Only the simplest cases are supported */
656                 if (is_return && nquads != 1) {
657                         args [0] = ARG_CLASS_MEMORY;
658                         args [1] = ARG_CLASS_MEMORY;
659                 }
660         } else {
661                 /*
662                  * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
663                  * The X87 and SSEUP stuff is left out since there are no such types in
664                  * the CLR.
665                  */
666                 g_assert (info);
667
668                 if (!fields) {
669                         ainfo->storage = ArgValuetypeInReg;
670                         ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
671                         return;
672                 }
673
674                 if (info->native_size > 16) {
675                         ainfo->offset = *stack_size;
676                         *stack_size += ALIGN_TO (info->native_size, 8);
677                         ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
678                         if (!is_return)
679                                 ainfo->arg_size = ALIGN_TO (info->native_size, 8);
680
681                         g_free (fields);
682                         return;
683                 }
684
685                 args [0] = ARG_CLASS_NO_CLASS;
686                 args [1] = ARG_CLASS_NO_CLASS;
687                 for (quad = 0; quad < nquads; ++quad) {
688                         int size;
689                         guint32 align;
690                         ArgumentClass class1;
691
692                         if (nfields == 0)
693                                 class1 = ARG_CLASS_MEMORY;
694                         else
695                                 class1 = ARG_CLASS_NO_CLASS;
696                         for (i = 0; i < nfields; ++i) {
697                                 size = mono_marshal_type_size (fields [i].field->type,
698                                                                                            fields [i].mspec,
699                                                                                            &align, TRUE, klass->unicode);
700                                 if ((fields [i].offset < 8) && (fields [i].offset + size) > 8) {
701                                         /* Unaligned field */
702                                         NOT_IMPLEMENTED;
703                                 }
704
705                                 /* Skip fields in other quad */
706                                 if ((quad == 0) && (fields [i].offset >= 8))
707                                         continue;
708                                 if ((quad == 1) && (fields [i].offset < 8))
709                                         continue;
710
711                                 /* How far into this quad this data extends.*/
712                                 /* (8 is size of quad) */
713                                 quadsize [quad] = fields [i].offset + size - (quad * 8);
714
715                                 class1 = merge_argument_class_from_type (fields [i].field->type, class1);
716                         }
717                         g_assert (class1 != ARG_CLASS_NO_CLASS);
718                         args [quad] = class1;
719                 }
720         }
721
722         g_free (fields);
723
724         /* Post merger cleanup */
725         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
726                 args [0] = args [1] = ARG_CLASS_MEMORY;
727
728         /* Allocate registers */
729         {
730                 int orig_gr = *gr;
731                 int orig_fr = *fr;
732
733                 while (quadsize [0] != 1 && quadsize [0] != 2 && quadsize [0] != 4 && quadsize [0] != 8)
734                         quadsize [0] ++;
735                 while (quadsize [1] != 0 && quadsize [1] != 1 && quadsize [1] != 2 && quadsize [1] != 4 && quadsize [1] != 8)
736                         quadsize [1] ++;
737
738                 ainfo->storage = ArgValuetypeInReg;
739                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
740                 g_assert (quadsize [0] <= 8);
741                 g_assert (quadsize [1] <= 8);
742                 ainfo->pair_size [0] = quadsize [0];
743                 ainfo->pair_size [1] = quadsize [1];
744                 ainfo->nregs = nquads;
745                 for (quad = 0; quad < nquads; ++quad) {
746                         switch (args [quad]) {
747                         case ARG_CLASS_INTEGER:
748                                 if (*gr >= PARAM_REGS)
749                                         args [quad] = ARG_CLASS_MEMORY;
750                                 else {
751                                         ainfo->pair_storage [quad] = ArgInIReg;
752                                         if (is_return)
753                                                 ainfo->pair_regs [quad] = return_regs [*gr];
754                                         else
755                                                 ainfo->pair_regs [quad] = param_regs [*gr];
756                                         (*gr) ++;
757                                 }
758                                 break;
759                         case ARG_CLASS_SSE:
760                                 if (*fr >= FLOAT_PARAM_REGS)
761                                         args [quad] = ARG_CLASS_MEMORY;
762                                 else {
763                                         if (quadsize[quad] <= 4)
764                                                 ainfo->pair_storage [quad] = ArgInFloatSSEReg;
765                                         else ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
766                                         ainfo->pair_regs [quad] = *fr;
767                                         (*fr) ++;
768                                 }
769                                 break;
770                         case ARG_CLASS_MEMORY:
771                                 break;
772                         default:
773                                 g_assert_not_reached ();
774                         }
775                 }
776
777                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
778                         int arg_size;
779                         /* Revert possible register assignments */
780                         *gr = orig_gr;
781                         *fr = orig_fr;
782
783                         ainfo->offset = *stack_size;
784                         if (sig->pinvoke)
785                                 arg_size = ALIGN_TO (info->native_size, 8);
786                         else
787                                 arg_size = nquads * sizeof(mgreg_t);
788                         *stack_size += arg_size;
789                         ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
790                         if (!is_return)
791                                 ainfo->arg_size = arg_size;
792                 }
793         }
794 #endif /* !TARGET_WIN32 */
795 }
796
797 /*
798  * get_call_info:
799  *
800  *  Obtain information about a call according to the calling convention.
801  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
802  * Draft Version 0.23" document for more information.
803  */
804 static CallInfo*
805 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
806 {
807         guint32 i, gr, fr, pstart;
808         MonoType *ret_type;
809         int n = sig->hasthis + sig->param_count;
810         guint32 stack_size = 0;
811         CallInfo *cinfo;
812         gboolean is_pinvoke = sig->pinvoke;
813
814         if (mp)
815                 cinfo = (CallInfo *)mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
816         else
817                 cinfo = (CallInfo *)g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
818
819         cinfo->nargs = n;
820         cinfo->gsharedvt = mini_is_gsharedvt_variable_signature (sig);
821
822         gr = 0;
823         fr = 0;
824
825 #ifdef TARGET_WIN32
826         /* Reserve space where the callee can save the argument registers */
827         stack_size = 4 * sizeof (mgreg_t);
828 #endif
829
830         /* return value */
831         ret_type = mini_get_underlying_type (sig->ret);
832         switch (ret_type->type) {
833         case MONO_TYPE_I1:
834         case MONO_TYPE_U1:
835         case MONO_TYPE_I2:
836         case MONO_TYPE_U2:
837         case MONO_TYPE_I4:
838         case MONO_TYPE_U4:
839         case MONO_TYPE_I:
840         case MONO_TYPE_U:
841         case MONO_TYPE_PTR:
842         case MONO_TYPE_FNPTR:
843         case MONO_TYPE_CLASS:
844         case MONO_TYPE_OBJECT:
845         case MONO_TYPE_SZARRAY:
846         case MONO_TYPE_ARRAY:
847         case MONO_TYPE_STRING:
848                 cinfo->ret.storage = ArgInIReg;
849                 cinfo->ret.reg = AMD64_RAX;
850                 break;
851         case MONO_TYPE_U8:
852         case MONO_TYPE_I8:
853                 cinfo->ret.storage = ArgInIReg;
854                 cinfo->ret.reg = AMD64_RAX;
855                 break;
856         case MONO_TYPE_R4:
857                 cinfo->ret.storage = ArgInFloatSSEReg;
858                 cinfo->ret.reg = AMD64_XMM0;
859                 break;
860         case MONO_TYPE_R8:
861                 cinfo->ret.storage = ArgInDoubleSSEReg;
862                 cinfo->ret.reg = AMD64_XMM0;
863                 break;
864         case MONO_TYPE_GENERICINST:
865                 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
866                         cinfo->ret.storage = ArgInIReg;
867                         cinfo->ret.reg = AMD64_RAX;
868                         break;
869                 }
870                 if (mini_is_gsharedvt_type (ret_type)) {
871                         cinfo->ret.storage = ArgGsharedvtVariableInReg;
872                         break;
873                 }
874                 /* fall through */
875         case MONO_TYPE_VALUETYPE:
876         case MONO_TYPE_TYPEDBYREF: {
877                 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
878
879                 add_valuetype (sig, &cinfo->ret, ret_type, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
880                 g_assert (cinfo->ret.storage != ArgInIReg);
881                 break;
882         }
883         case MONO_TYPE_VAR:
884         case MONO_TYPE_MVAR:
885                 g_assert (mini_is_gsharedvt_type (ret_type));
886                 cinfo->ret.storage = ArgGsharedvtVariableInReg;
887                 break;
888         case MONO_TYPE_VOID:
889                 break;
890         default:
891                 g_error ("Can't handle as return value 0x%x", ret_type->type);
892         }
893
894         pstart = 0;
895         /*
896          * To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
897          * the first argument, allowing 'this' to be always passed in the first arg reg.
898          * Also do this if the first argument is a reference type, since virtual calls
899          * are sometimes made using calli without sig->hasthis set, like in the delegate
900          * invoke wrappers.
901          */
902         ArgStorage ret_storage = cinfo->ret.storage;
903         if ((ret_storage == ArgValuetypeAddrInIReg || ret_storage == ArgGsharedvtVariableInReg) && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig->params [0]))))) {
904                 if (sig->hasthis) {
905                         add_general (&gr, &stack_size, cinfo->args + 0);
906                 } else {
907                         add_general (&gr, &stack_size, &cinfo->args [sig->hasthis + 0]);
908                         pstart = 1;
909                 }
910                 add_general (&gr, &stack_size, &cinfo->ret);
911                 cinfo->ret.storage = ret_storage;
912                 cinfo->vret_arg_index = 1;
913         } else {
914                 /* this */
915                 if (sig->hasthis)
916                         add_general (&gr, &stack_size, cinfo->args + 0);
917
918                 if (ret_storage == ArgValuetypeAddrInIReg || ret_storage == ArgGsharedvtVariableInReg) {
919                         add_general (&gr, &stack_size, &cinfo->ret);
920                         cinfo->ret.storage = ret_storage;
921                 }
922         }
923
924         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
925                 gr = PARAM_REGS;
926                 fr = FLOAT_PARAM_REGS;
927                 
928                 /* Emit the signature cookie just before the implicit arguments */
929                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
930         }
931
932         for (i = pstart; i < sig->param_count; ++i) {
933                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
934                 MonoType *ptype;
935
936 #ifdef TARGET_WIN32
937                 /* The float param registers and other param registers must be the same index on Windows x64.*/
938                 if (gr > fr)
939                         fr = gr;
940                 else if (fr > gr)
941                         gr = fr;
942 #endif
943
944                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
945                         /* We allways pass the sig cookie on the stack for simplicity */
946                         /* 
947                          * Prevent implicit arguments + the sig cookie from being passed 
948                          * in registers.
949                          */
950                         gr = PARAM_REGS;
951                         fr = FLOAT_PARAM_REGS;
952
953                         /* Emit the signature cookie just before the implicit arguments */
954                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
955                 }
956
957                 ptype = mini_get_underlying_type (sig->params [i]);
958                 switch (ptype->type) {
959                 case MONO_TYPE_I1:
960                 case MONO_TYPE_U1:
961                         add_general (&gr, &stack_size, ainfo);
962                         break;
963                 case MONO_TYPE_I2:
964                 case MONO_TYPE_U2:
965                         add_general (&gr, &stack_size, ainfo);
966                         break;
967                 case MONO_TYPE_I4:
968                 case MONO_TYPE_U4:
969                         add_general (&gr, &stack_size, ainfo);
970                         break;
971                 case MONO_TYPE_I:
972                 case MONO_TYPE_U:
973                 case MONO_TYPE_PTR:
974                 case MONO_TYPE_FNPTR:
975                 case MONO_TYPE_CLASS:
976                 case MONO_TYPE_OBJECT:
977                 case MONO_TYPE_STRING:
978                 case MONO_TYPE_SZARRAY:
979                 case MONO_TYPE_ARRAY:
980                         add_general (&gr, &stack_size, ainfo);
981                         break;
982                 case MONO_TYPE_GENERICINST:
983                         if (!mono_type_generic_inst_is_valuetype (ptype)) {
984                                 add_general (&gr, &stack_size, ainfo);
985                                 break;
986                         }
987                         if (mini_is_gsharedvt_variable_type (ptype)) {
988                                 /* gsharedvt arguments are passed by ref */
989                                 add_general (&gr, &stack_size, ainfo);
990                                 if (ainfo->storage == ArgInIReg)
991                                         ainfo->storage = ArgGSharedVtInReg;
992                                 else
993                                         ainfo->storage = ArgGSharedVtOnStack;
994                                 break;
995                         }
996                         /* fall through */
997                 case MONO_TYPE_VALUETYPE:
998                 case MONO_TYPE_TYPEDBYREF:
999                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
1000                         break;
1001                 case MONO_TYPE_U8:
1002
1003                 case MONO_TYPE_I8:
1004                         add_general (&gr, &stack_size, ainfo);
1005                         break;
1006                 case MONO_TYPE_R4:
1007                         add_float (&fr, &stack_size, ainfo, FALSE);
1008                         break;
1009                 case MONO_TYPE_R8:
1010                         add_float (&fr, &stack_size, ainfo, TRUE);
1011                         break;
1012                 case MONO_TYPE_VAR:
1013                 case MONO_TYPE_MVAR:
1014                         /* gsharedvt arguments are passed by ref */
1015                         g_assert (mini_is_gsharedvt_type (ptype));
1016                         add_general (&gr, &stack_size, ainfo);
1017                         if (ainfo->storage == ArgInIReg)
1018                                 ainfo->storage = ArgGSharedVtInReg;
1019                         else
1020                                 ainfo->storage = ArgGSharedVtOnStack;
1021                         break;
1022                 default:
1023                         g_assert_not_reached ();
1024                 }
1025         }
1026
1027         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
1028                 gr = PARAM_REGS;
1029                 fr = FLOAT_PARAM_REGS;
1030                 
1031                 /* Emit the signature cookie just before the implicit arguments */
1032                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
1033         }
1034
1035         cinfo->stack_usage = stack_size;
1036         cinfo->reg_usage = gr;
1037         cinfo->freg_usage = fr;
1038         return cinfo;
1039 }
1040
1041 /*
1042  * mono_arch_get_argument_info:
1043  * @csig:  a method signature
1044  * @param_count: the number of parameters to consider
1045  * @arg_info: an array to store the result infos
1046  *
1047  * Gathers information on parameters such as size, alignment and
1048  * padding. arg_info should be large enought to hold param_count + 1 entries. 
1049  *
1050  * Returns the size of the argument area on the stack.
1051  */
1052 int
1053 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
1054 {
1055         int k;
1056         CallInfo *cinfo = get_call_info (NULL, csig);
1057         guint32 args_size = cinfo->stack_usage;
1058
1059         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
1060         if (csig->hasthis) {
1061                 arg_info [0].offset = 0;
1062         }
1063
1064         for (k = 0; k < param_count; k++) {
1065                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
1066                 /* FIXME: */
1067                 arg_info [k + 1].size = 0;
1068         }
1069
1070         g_free (cinfo);
1071
1072         return args_size;
1073 }
1074
1075 gboolean
1076 mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig)
1077 {
1078         CallInfo *c1, *c2;
1079         gboolean res;
1080         MonoType *callee_ret;
1081
1082         c1 = get_call_info (NULL, caller_sig);
1083         c2 = get_call_info (NULL, callee_sig);
1084         res = c1->stack_usage >= c2->stack_usage;
1085         callee_ret = mini_get_underlying_type (callee_sig->ret);
1086         if (callee_ret && MONO_TYPE_ISSTRUCT (callee_ret) && c2->ret.storage != ArgValuetypeInReg)
1087                 /* An address on the callee's stack is passed as the first argument */
1088                 res = FALSE;
1089
1090         g_free (c1);
1091         g_free (c2);
1092
1093         return res;
1094 }
1095
1096 /*
1097  * Initialize the cpu to execute managed code.
1098  */
1099 void
1100 mono_arch_cpu_init (void)
1101 {
1102 #ifndef _MSC_VER
1103         guint16 fpcw;
1104
1105         /* spec compliance requires running with double precision */
1106         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
1107         fpcw &= ~X86_FPCW_PRECC_MASK;
1108         fpcw |= X86_FPCW_PREC_DOUBLE;
1109         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
1110         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
1111 #else
1112         /* TODO: This is crashing on Win64 right now.
1113         * _control87 (_PC_53, MCW_PC);
1114         */
1115 #endif
1116 }
1117
1118 /*
1119  * Initialize architecture specific code.
1120  */
1121 void
1122 mono_arch_init (void)
1123 {
1124         mono_os_mutex_init_recursive (&mini_arch_mutex);
1125
1126         mono_aot_register_jit_icall ("mono_amd64_throw_exception", mono_amd64_throw_exception);
1127         mono_aot_register_jit_icall ("mono_amd64_throw_corlib_exception", mono_amd64_throw_corlib_exception);
1128         mono_aot_register_jit_icall ("mono_amd64_resume_unwind", mono_amd64_resume_unwind);
1129         mono_aot_register_jit_icall ("mono_amd64_get_original_ip", mono_amd64_get_original_ip);
1130 #if defined(MONO_ARCH_GSHAREDVT_SUPPORTED)
1131         mono_aot_register_jit_icall ("mono_amd64_start_gsharedvt_call", mono_amd64_start_gsharedvt_call);
1132 #endif
1133
1134         if (!mono_aot_only)
1135                 bp_trampoline = mini_get_breakpoint_trampoline ();
1136 }
1137
1138 /*
1139  * Cleanup architecture specific code.
1140  */
1141 void
1142 mono_arch_cleanup (void)
1143 {
1144         mono_os_mutex_destroy (&mini_arch_mutex);
1145 }
1146
1147 /*
1148  * This function returns the optimizations supported on this cpu.
1149  */
1150 guint32
1151 mono_arch_cpu_optimizations (guint32 *exclude_mask)
1152 {
1153         guint32 opts = 0;
1154
1155         *exclude_mask = 0;
1156
1157         if (mono_hwcap_x86_has_cmov) {
1158                 opts |= MONO_OPT_CMOV;
1159
1160                 if (mono_hwcap_x86_has_fcmov)
1161                         opts |= MONO_OPT_FCMOV;
1162                 else
1163                         *exclude_mask |= MONO_OPT_FCMOV;
1164         } else {
1165                 *exclude_mask |= MONO_OPT_CMOV;
1166         }
1167
1168         return opts;
1169 }
1170
1171 /*
1172  * This function test for all SSE functions supported.
1173  *
1174  * Returns a bitmask corresponding to all supported versions.
1175  * 
1176  */
1177 guint32
1178 mono_arch_cpu_enumerate_simd_versions (void)
1179 {
1180         guint32 sse_opts = 0;
1181
1182         if (mono_hwcap_x86_has_sse1)
1183                 sse_opts |= SIMD_VERSION_SSE1;
1184
1185         if (mono_hwcap_x86_has_sse2)
1186                 sse_opts |= SIMD_VERSION_SSE2;
1187
1188         if (mono_hwcap_x86_has_sse3)
1189                 sse_opts |= SIMD_VERSION_SSE3;
1190
1191         if (mono_hwcap_x86_has_ssse3)
1192                 sse_opts |= SIMD_VERSION_SSSE3;
1193
1194         if (mono_hwcap_x86_has_sse41)
1195                 sse_opts |= SIMD_VERSION_SSE41;
1196
1197         if (mono_hwcap_x86_has_sse42)
1198                 sse_opts |= SIMD_VERSION_SSE42;
1199
1200         if (mono_hwcap_x86_has_sse4a)
1201                 sse_opts |= SIMD_VERSION_SSE4a;
1202
1203         return sse_opts;
1204 }
1205
1206 #ifndef DISABLE_JIT
1207
1208 GList *
1209 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
1210 {
1211         GList *vars = NULL;
1212         int i;
1213
1214         for (i = 0; i < cfg->num_varinfo; i++) {
1215                 MonoInst *ins = cfg->varinfo [i];
1216                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
1217
1218                 /* unused vars */
1219                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
1220                         continue;
1221
1222                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
1223                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
1224                         continue;
1225
1226                 if (mono_is_regsize_var (ins->inst_vtype)) {
1227                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
1228                         g_assert (i == vmv->idx);
1229                         vars = g_list_prepend (vars, vmv);
1230                 }
1231         }
1232
1233         vars = mono_varlist_sort (cfg, vars, 0);
1234
1235         return vars;
1236 }
1237
1238 /**
1239  * mono_arch_compute_omit_fp:
1240  *
1241  *   Determine whenever the frame pointer can be eliminated.
1242  */
1243 static void
1244 mono_arch_compute_omit_fp (MonoCompile *cfg)
1245 {
1246         MonoMethodSignature *sig;
1247         MonoMethodHeader *header;
1248         int i, locals_size;
1249         CallInfo *cinfo;
1250
1251         if (cfg->arch.omit_fp_computed)
1252                 return;
1253
1254         header = cfg->header;
1255
1256         sig = mono_method_signature (cfg->method);
1257
1258         if (!cfg->arch.cinfo)
1259                 cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
1260         cinfo = (CallInfo *)cfg->arch.cinfo;
1261
1262         /*
1263          * FIXME: Remove some of the restrictions.
1264          */
1265         cfg->arch.omit_fp = TRUE;
1266         cfg->arch.omit_fp_computed = TRUE;
1267
1268         if (cfg->disable_omit_fp)
1269                 cfg->arch.omit_fp = FALSE;
1270
1271         if (!debug_omit_fp ())
1272                 cfg->arch.omit_fp = FALSE;
1273         /*
1274         if (cfg->method->save_lmf)
1275                 cfg->arch.omit_fp = FALSE;
1276         */
1277         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
1278                 cfg->arch.omit_fp = FALSE;
1279         if (header->num_clauses)
1280                 cfg->arch.omit_fp = FALSE;
1281         if (cfg->param_area)
1282                 cfg->arch.omit_fp = FALSE;
1283         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1284                 cfg->arch.omit_fp = FALSE;
1285         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
1286                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
1287                 cfg->arch.omit_fp = FALSE;
1288         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1289                 ArgInfo *ainfo = &cinfo->args [i];
1290
1291                 if (ainfo->storage == ArgOnStack) {
1292                         /* 
1293                          * The stack offset can only be determined when the frame
1294                          * size is known.
1295                          */
1296                         cfg->arch.omit_fp = FALSE;
1297                 }
1298         }
1299
1300         locals_size = 0;
1301         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1302                 MonoInst *ins = cfg->varinfo [i];
1303                 int ialign;
1304
1305                 locals_size += mono_type_size (ins->inst_vtype, &ialign);
1306         }
1307 }
1308
1309 GList *
1310 mono_arch_get_global_int_regs (MonoCompile *cfg)
1311 {
1312         GList *regs = NULL;
1313
1314         mono_arch_compute_omit_fp (cfg);
1315
1316         if (cfg->arch.omit_fp)
1317                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1318
1319         /* We use the callee saved registers for global allocation */
1320         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1321         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1322         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1323         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1324         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1325 #ifdef TARGET_WIN32
1326         regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1327         regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1328 #endif
1329
1330         return regs;
1331 }
1332  
1333 GList*
1334 mono_arch_get_global_fp_regs (MonoCompile *cfg)
1335 {
1336         GList *regs = NULL;
1337         int i;
1338
1339         /* All XMM registers */
1340         for (i = 0; i < 16; ++i)
1341                 regs = g_list_prepend (regs, GINT_TO_POINTER (i));
1342
1343         return regs;
1344 }
1345
1346 GList*
1347 mono_arch_get_iregs_clobbered_by_call (MonoCallInst *call)
1348 {
1349         static GList *r = NULL;
1350
1351         if (r == NULL) {
1352                 GList *regs = NULL;
1353
1354                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1355                 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1356                 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1357                 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1358                 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1359                 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1360
1361                 regs = g_list_prepend (regs, (gpointer)AMD64_R10);
1362                 regs = g_list_prepend (regs, (gpointer)AMD64_R9);
1363                 regs = g_list_prepend (regs, (gpointer)AMD64_R8);
1364                 regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1365                 regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1366                 regs = g_list_prepend (regs, (gpointer)AMD64_RDX);
1367                 regs = g_list_prepend (regs, (gpointer)AMD64_RCX);
1368                 regs = g_list_prepend (regs, (gpointer)AMD64_RAX);
1369
1370                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1371         }
1372
1373         return r;
1374 }
1375
1376 GList*
1377 mono_arch_get_fregs_clobbered_by_call (MonoCallInst *call)
1378 {
1379         int i;
1380         static GList *r = NULL;
1381
1382         if (r == NULL) {
1383                 GList *regs = NULL;
1384
1385                 for (i = 0; i < AMD64_XMM_NREG; ++i)
1386                         regs = g_list_prepend (regs, GINT_TO_POINTER (MONO_MAX_IREGS + i));
1387
1388                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1389         }
1390
1391         return r;
1392 }
1393
1394 /*
1395  * mono_arch_regalloc_cost:
1396  *
1397  *  Return the cost, in number of memory references, of the action of 
1398  * allocating the variable VMV into a register during global register
1399  * allocation.
1400  */
1401 guint32
1402 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
1403 {
1404         MonoInst *ins = cfg->varinfo [vmv->idx];
1405
1406         if (cfg->method->save_lmf)
1407                 /* The register is already saved */
1408                 /* substract 1 for the invisible store in the prolog */
1409                 return (ins->opcode == OP_ARG) ? 0 : 1;
1410         else
1411                 /* push+pop */
1412                 return (ins->opcode == OP_ARG) ? 1 : 2;
1413 }
1414
1415 /*
1416  * mono_arch_fill_argument_info:
1417  *
1418  *   Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments
1419  * of the method.
1420  */
1421 void
1422 mono_arch_fill_argument_info (MonoCompile *cfg)
1423 {
1424         MonoType *sig_ret;
1425         MonoMethodSignature *sig;
1426         MonoInst *ins;
1427         int i;
1428         CallInfo *cinfo;
1429
1430         sig = mono_method_signature (cfg->method);
1431
1432         cinfo = (CallInfo *)cfg->arch.cinfo;
1433         sig_ret = mini_get_underlying_type (sig->ret);
1434
1435         /*
1436          * Contrary to mono_arch_allocate_vars (), the information should describe
1437          * where the arguments are at the beginning of the method, not where they can be 
1438          * accessed during the execution of the method. The later makes no sense for the 
1439          * global register allocator, since a variable can be in more than one location.
1440          */
1441         switch (cinfo->ret.storage) {
1442         case ArgInIReg:
1443         case ArgInFloatSSEReg:
1444         case ArgInDoubleSSEReg:
1445                 cfg->ret->opcode = OP_REGVAR;
1446                 cfg->ret->inst_c0 = cinfo->ret.reg;
1447                 break;
1448         case ArgValuetypeInReg:
1449                 cfg->ret->opcode = OP_REGOFFSET;
1450                 cfg->ret->inst_basereg = -1;
1451                 cfg->ret->inst_offset = -1;
1452                 break;
1453         case ArgNone:
1454                 break;
1455         default:
1456                 g_assert_not_reached ();
1457         }
1458
1459         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1460                 ArgInfo *ainfo = &cinfo->args [i];
1461
1462                 ins = cfg->args [i];
1463
1464                 switch (ainfo->storage) {
1465                 case ArgInIReg:
1466                 case ArgInFloatSSEReg:
1467                 case ArgInDoubleSSEReg:
1468                         ins->opcode = OP_REGVAR;
1469                         ins->inst_c0 = ainfo->reg;
1470                         break;
1471                 case ArgOnStack:
1472                         ins->opcode = OP_REGOFFSET;
1473                         ins->inst_basereg = -1;
1474                         ins->inst_offset = -1;
1475                         break;
1476                 case ArgValuetypeInReg:
1477                         /* Dummy */
1478                         ins->opcode = OP_NOP;
1479                         break;
1480                 default:
1481                         g_assert_not_reached ();
1482                 }
1483         }
1484 }
1485  
1486 void
1487 mono_arch_allocate_vars (MonoCompile *cfg)
1488 {
1489         MonoType *sig_ret;
1490         MonoMethodSignature *sig;
1491         MonoInst *ins;
1492         int i, offset;
1493         guint32 locals_stack_size, locals_stack_align;
1494         gint32 *offsets;
1495         CallInfo *cinfo;
1496
1497         sig = mono_method_signature (cfg->method);
1498
1499         cinfo = (CallInfo *)cfg->arch.cinfo;
1500         sig_ret = mini_get_underlying_type (sig->ret);
1501
1502         mono_arch_compute_omit_fp (cfg);
1503
1504         /*
1505          * We use the ABI calling conventions for managed code as well.
1506          * Exception: valuetypes are only sometimes passed or returned in registers.
1507          */
1508
1509         /*
1510          * The stack looks like this:
1511          * <incoming arguments passed on the stack>
1512          * <return value>
1513          * <lmf/caller saved registers>
1514          * <locals>
1515          * <spill area>
1516          * <localloc area>  -> grows dynamically
1517          * <params area>
1518          */
1519
1520         if (cfg->arch.omit_fp) {
1521                 cfg->flags |= MONO_CFG_HAS_SPILLUP;
1522                 cfg->frame_reg = AMD64_RSP;
1523                 offset = 0;
1524         } else {
1525                 /* Locals are allocated backwards from %fp */
1526                 cfg->frame_reg = AMD64_RBP;
1527                 offset = 0;
1528         }
1529
1530         cfg->arch.saved_iregs = cfg->used_int_regs;
1531         if (cfg->method->save_lmf)
1532                 /* Save all callee-saved registers normally, and restore them when unwinding through an LMF */
1533                 cfg->arch.saved_iregs |= (1 << AMD64_RBX) | (1 << AMD64_R12) | (1 << AMD64_R13) | (1 << AMD64_R14) | (1 << AMD64_R15);
1534
1535         if (cfg->arch.omit_fp)
1536                 cfg->arch.reg_save_area_offset = offset;
1537         /* Reserve space for callee saved registers */
1538         for (i = 0; i < AMD64_NREG; ++i)
1539                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
1540                         offset += sizeof(mgreg_t);
1541                 }
1542         if (!cfg->arch.omit_fp)
1543                 cfg->arch.reg_save_area_offset = -offset;
1544
1545         if (sig_ret->type != MONO_TYPE_VOID) {
1546                 switch (cinfo->ret.storage) {
1547                 case ArgInIReg:
1548                 case ArgInFloatSSEReg:
1549                 case ArgInDoubleSSEReg:
1550                         cfg->ret->opcode = OP_REGVAR;
1551                         cfg->ret->inst_c0 = cinfo->ret.reg;
1552                         cfg->ret->dreg = cinfo->ret.reg;
1553                         break;
1554                 case ArgValuetypeAddrInIReg:
1555                 case ArgGsharedvtVariableInReg:
1556                         /* The register is volatile */
1557                         cfg->vret_addr->opcode = OP_REGOFFSET;
1558                         cfg->vret_addr->inst_basereg = cfg->frame_reg;
1559                         if (cfg->arch.omit_fp) {
1560                                 cfg->vret_addr->inst_offset = offset;
1561                                 offset += 8;
1562                         } else {
1563                                 offset += 8;
1564                                 cfg->vret_addr->inst_offset = -offset;
1565                         }
1566                         if (G_UNLIKELY (cfg->verbose_level > 1)) {
1567                                 printf ("vret_addr =");
1568                                 mono_print_ins (cfg->vret_addr);
1569                         }
1570                         break;
1571                 case ArgValuetypeInReg:
1572                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1573                         cfg->ret->opcode = OP_REGOFFSET;
1574                         cfg->ret->inst_basereg = cfg->frame_reg;
1575                         if (cfg->arch.omit_fp) {
1576                                 cfg->ret->inst_offset = offset;
1577                                 offset += cinfo->ret.pair_storage [1] == ArgNone ? 8 : 16;
1578                         } else {
1579                                 offset += cinfo->ret.pair_storage [1] == ArgNone ? 8 : 16;
1580                                 cfg->ret->inst_offset = - offset;
1581                         }
1582                         break;
1583                 default:
1584                         g_assert_not_reached ();
1585                 }
1586         }
1587
1588         /* Allocate locals */
1589         offsets = mono_allocate_stack_slots (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align);
1590         if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1591                 char *mname = mono_method_full_name (cfg->method, TRUE);
1592                 mono_cfg_set_exception_invalid_program (cfg, g_strdup_printf ("Method %s stack is too big.", mname));
1593                 g_free (mname);
1594                 return;
1595         }
1596                 
1597         if (locals_stack_align) {
1598                 offset += (locals_stack_align - 1);
1599                 offset &= ~(locals_stack_align - 1);
1600         }
1601         if (cfg->arch.omit_fp) {
1602                 cfg->locals_min_stack_offset = offset;
1603                 cfg->locals_max_stack_offset = offset + locals_stack_size;
1604         } else {
1605                 cfg->locals_min_stack_offset = - (offset + locals_stack_size);
1606                 cfg->locals_max_stack_offset = - offset;
1607         }
1608                 
1609         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1610                 if (offsets [i] != -1) {
1611                         MonoInst *ins = cfg->varinfo [i];
1612                         ins->opcode = OP_REGOFFSET;
1613                         ins->inst_basereg = cfg->frame_reg;
1614                         if (cfg->arch.omit_fp)
1615                                 ins->inst_offset = (offset + offsets [i]);
1616                         else
1617                                 ins->inst_offset = - (offset + offsets [i]);
1618                         //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
1619                 }
1620         }
1621         offset += locals_stack_size;
1622
1623         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
1624                 g_assert (!cfg->arch.omit_fp);
1625                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1626                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1627         }
1628
1629         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1630                 ins = cfg->args [i];
1631                 if (ins->opcode != OP_REGVAR) {
1632                         ArgInfo *ainfo = &cinfo->args [i];
1633                         gboolean inreg = TRUE;
1634
1635                         /* FIXME: Allocate volatile arguments to registers */
1636                         if (ins->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
1637                                 inreg = FALSE;
1638
1639                         /* 
1640                          * Under AMD64, all registers used to pass arguments to functions
1641                          * are volatile across calls.
1642                          * FIXME: Optimize this.
1643                          */
1644                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg) || (ainfo->storage == ArgGSharedVtInReg))
1645                                 inreg = FALSE;
1646
1647                         ins->opcode = OP_REGOFFSET;
1648
1649                         switch (ainfo->storage) {
1650                         case ArgInIReg:
1651                         case ArgInFloatSSEReg:
1652                         case ArgInDoubleSSEReg:
1653                         case ArgGSharedVtInReg:
1654                                 if (inreg) {
1655                                         ins->opcode = OP_REGVAR;
1656                                         ins->dreg = ainfo->reg;
1657                                 }
1658                                 break;
1659                         case ArgOnStack:
1660                         case ArgGSharedVtOnStack:
1661                                 g_assert (!cfg->arch.omit_fp);
1662                                 ins->opcode = OP_REGOFFSET;
1663                                 ins->inst_basereg = cfg->frame_reg;
1664                                 ins->inst_offset = ainfo->offset + ARGS_OFFSET;
1665                                 break;
1666                         case ArgValuetypeInReg:
1667                                 break;
1668                         case ArgValuetypeAddrInIReg: {
1669                                 MonoInst *indir;
1670                                 g_assert (!cfg->arch.omit_fp);
1671                                 
1672                                 MONO_INST_NEW (cfg, indir, 0);
1673                                 indir->opcode = OP_REGOFFSET;
1674                                 if (ainfo->pair_storage [0] == ArgInIReg) {
1675                                         indir->inst_basereg = cfg->frame_reg;
1676                                         offset = ALIGN_TO (offset, sizeof (gpointer));
1677                                         offset += (sizeof (gpointer));
1678                                         indir->inst_offset = - offset;
1679                                 }
1680                                 else {
1681                                         indir->inst_basereg = cfg->frame_reg;
1682                                         indir->inst_offset = ainfo->offset + ARGS_OFFSET;
1683                                 }
1684                                 
1685                                 ins->opcode = OP_VTARG_ADDR;
1686                                 ins->inst_left = indir;
1687                                 
1688                                 break;
1689                         }
1690                         default:
1691                                 NOT_IMPLEMENTED;
1692                         }
1693
1694                         if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg) && (ainfo->storage != ArgGSharedVtOnStack)) {
1695                                 ins->opcode = OP_REGOFFSET;
1696                                 ins->inst_basereg = cfg->frame_reg;
1697                                 /* These arguments are saved to the stack in the prolog */
1698                                 offset = ALIGN_TO (offset, sizeof(mgreg_t));
1699                                 if (cfg->arch.omit_fp) {
1700                                         ins->inst_offset = offset;
1701                                         offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t);
1702                                         // Arguments are yet supported by the stack map creation code
1703                                         //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset);
1704                                 } else {
1705                                         offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t);
1706                                         ins->inst_offset = - offset;
1707                                         //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset);
1708                                 }
1709                         }
1710                 }
1711         }
1712
1713         cfg->stack_offset = offset;
1714 }
1715
1716 void
1717 mono_arch_create_vars (MonoCompile *cfg)
1718 {
1719         MonoMethodSignature *sig;
1720         CallInfo *cinfo;
1721         MonoType *sig_ret;
1722
1723         sig = mono_method_signature (cfg->method);
1724
1725         if (!cfg->arch.cinfo)
1726                 cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
1727         cinfo = (CallInfo *)cfg->arch.cinfo;
1728
1729         if (cinfo->ret.storage == ArgValuetypeInReg)
1730                 cfg->ret_var_is_local = TRUE;
1731
1732         sig_ret = mini_get_underlying_type (sig->ret);
1733         if (cinfo->ret.storage == ArgValuetypeAddrInIReg || cinfo->ret.storage == ArgGsharedvtVariableInReg) {
1734                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1735                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1736                         printf ("vret_addr = ");
1737                         mono_print_ins (cfg->vret_addr);
1738                 }
1739         }
1740
1741         if (cfg->gen_sdb_seq_points) {
1742                 MonoInst *ins;
1743
1744                 if (cfg->compile_aot) {
1745                         MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1746                         ins->flags |= MONO_INST_VOLATILE;
1747                         cfg->arch.seq_point_info_var = ins;
1748                 }
1749                 ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1750                 ins->flags |= MONO_INST_VOLATILE;
1751                 cfg->arch.ss_tramp_var = ins;
1752
1753                 ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1754                 ins->flags |= MONO_INST_VOLATILE;
1755                 cfg->arch.bp_tramp_var = ins;
1756         }
1757
1758         if (cfg->method->save_lmf)
1759                 cfg->create_lmf_var = TRUE;
1760
1761         if (cfg->method->save_lmf) {
1762                 cfg->lmf_ir = TRUE;
1763 #if !defined(TARGET_WIN32)
1764                 if (mono_get_lmf_tls_offset () != -1 && !optimize_for_xen)
1765                         cfg->lmf_ir_mono_lmf = TRUE;
1766 #endif
1767         }
1768 }
1769
1770 static void
1771 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *tree)
1772 {
1773         MonoInst *ins;
1774
1775         switch (storage) {
1776         case ArgInIReg:
1777                 MONO_INST_NEW (cfg, ins, OP_MOVE);
1778                 ins->dreg = mono_alloc_ireg_copy (cfg, tree->dreg);
1779                 ins->sreg1 = tree->dreg;
1780                 MONO_ADD_INS (cfg->cbb, ins);
1781                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
1782                 break;
1783         case ArgInFloatSSEReg:
1784                 MONO_INST_NEW (cfg, ins, OP_AMD64_SET_XMMREG_R4);
1785                 ins->dreg = mono_alloc_freg (cfg);
1786                 ins->sreg1 = tree->dreg;
1787                 MONO_ADD_INS (cfg->cbb, ins);
1788
1789                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
1790                 break;
1791         case ArgInDoubleSSEReg:
1792                 MONO_INST_NEW (cfg, ins, OP_FMOVE);
1793                 ins->dreg = mono_alloc_freg (cfg);
1794                 ins->sreg1 = tree->dreg;
1795                 MONO_ADD_INS (cfg->cbb, ins);
1796
1797                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
1798
1799                 break;
1800         default:
1801                 g_assert_not_reached ();
1802         }
1803 }
1804
1805 static int
1806 arg_storage_to_load_membase (ArgStorage storage)
1807 {
1808         switch (storage) {
1809         case ArgInIReg:
1810 #if defined(__mono_ilp32__)
1811                 return OP_LOADI8_MEMBASE;
1812 #else
1813                 return OP_LOAD_MEMBASE;
1814 #endif
1815         case ArgInDoubleSSEReg:
1816                 return OP_LOADR8_MEMBASE;
1817         case ArgInFloatSSEReg:
1818                 return OP_LOADR4_MEMBASE;
1819         default:
1820                 g_assert_not_reached ();
1821         }
1822
1823         return -1;
1824 }
1825
1826 static void
1827 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1828 {
1829         MonoMethodSignature *tmp_sig;
1830         int sig_reg;
1831
1832         if (call->tail_call)
1833                 NOT_IMPLEMENTED;
1834
1835         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1836                         
1837         /*
1838          * mono_ArgIterator_Setup assumes the signature cookie is 
1839          * passed first and all the arguments which were before it are
1840          * passed on the stack after the signature. So compensate by 
1841          * passing a different signature.
1842          */
1843         tmp_sig = mono_metadata_signature_dup_full (cfg->method->klass->image, call->signature);
1844         tmp_sig->param_count -= call->signature->sentinelpos;
1845         tmp_sig->sentinelpos = 0;
1846         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1847
1848         sig_reg = mono_alloc_ireg (cfg);
1849         MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig);
1850
1851         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, cinfo->sig_cookie.offset, sig_reg);
1852 }
1853
1854 #ifdef ENABLE_LLVM
1855 static inline LLVMArgStorage
1856 arg_storage_to_llvm_arg_storage (MonoCompile *cfg, ArgStorage storage)
1857 {
1858         switch (storage) {
1859         case ArgInIReg:
1860                 return LLVMArgInIReg;
1861         case ArgNone:
1862                 return LLVMArgNone;
1863         case ArgGSharedVtInReg:
1864         case ArgGSharedVtOnStack:
1865                 return LLVMArgGSharedVt;
1866         default:
1867                 g_assert_not_reached ();
1868                 return LLVMArgNone;
1869         }
1870 }
1871
1872 LLVMCallInfo*
1873 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
1874 {
1875         int i, n;
1876         CallInfo *cinfo;
1877         ArgInfo *ainfo;
1878         int j;
1879         LLVMCallInfo *linfo;
1880         MonoType *t, *sig_ret;
1881
1882         n = sig->param_count + sig->hasthis;
1883         sig_ret = mini_get_underlying_type (sig->ret);
1884
1885         cinfo = get_call_info (cfg->mempool, sig);
1886
1887         linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
1888
1889         /*
1890          * LLVM always uses the native ABI while we use our own ABI, the
1891          * only difference is the handling of vtypes:
1892          * - we only pass/receive them in registers in some cases, and only 
1893          *   in 1 or 2 integer registers.
1894          */
1895         switch (cinfo->ret.storage) {
1896         case ArgNone:
1897                 linfo->ret.storage = LLVMArgNone;
1898                 break;
1899         case ArgInIReg:
1900         case ArgInFloatSSEReg:
1901         case ArgInDoubleSSEReg:
1902                 linfo->ret.storage = LLVMArgNormal;
1903                 break;
1904         case ArgValuetypeInReg: {
1905                 ainfo = &cinfo->ret;
1906
1907                 if (sig->pinvoke &&
1908                         (ainfo->pair_storage [0] == ArgInFloatSSEReg || ainfo->pair_storage [0] == ArgInDoubleSSEReg ||
1909                          ainfo->pair_storage [1] == ArgInFloatSSEReg || ainfo->pair_storage [1] == ArgInDoubleSSEReg)) {
1910                         cfg->exception_message = g_strdup ("pinvoke + vtype ret");
1911                         cfg->disable_llvm = TRUE;
1912                         return linfo;
1913                 }
1914
1915                 linfo->ret.storage = LLVMArgVtypeInReg;
1916                 for (j = 0; j < 2; ++j)
1917                         linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1918                 break;
1919         }
1920         case ArgValuetypeAddrInIReg:
1921         case ArgGsharedvtVariableInReg:
1922                 /* Vtype returned using a hidden argument */
1923                 linfo->ret.storage = LLVMArgVtypeRetAddr;
1924                 linfo->vret_arg_index = cinfo->vret_arg_index;
1925                 break;
1926         default:
1927                 g_assert_not_reached ();
1928                 break;
1929         }
1930
1931         for (i = 0; i < n; ++i) {
1932                 ainfo = cinfo->args + i;
1933
1934                 if (i >= sig->hasthis)
1935                         t = sig->params [i - sig->hasthis];
1936                 else
1937                         t = &mono_defaults.int_class->byval_arg;
1938
1939                 linfo->args [i].storage = LLVMArgNone;
1940
1941                 switch (ainfo->storage) {
1942                 case ArgInIReg:
1943                         linfo->args [i].storage = LLVMArgNormal;
1944                         break;
1945                 case ArgInDoubleSSEReg:
1946                 case ArgInFloatSSEReg:
1947                         linfo->args [i].storage = LLVMArgNormal;
1948                         break;
1949                 case ArgOnStack:
1950                         if (MONO_TYPE_ISSTRUCT (t))
1951                                 linfo->args [i].storage = LLVMArgVtypeByVal;
1952                         else
1953                                 linfo->args [i].storage = LLVMArgNormal;
1954                         break;
1955                 case ArgValuetypeInReg:
1956                         if (sig->pinvoke &&
1957                                 (ainfo->pair_storage [0] == ArgInFloatSSEReg || ainfo->pair_storage [0] == ArgInDoubleSSEReg ||
1958                                  ainfo->pair_storage [1] == ArgInFloatSSEReg || ainfo->pair_storage [1] == ArgInDoubleSSEReg)) {
1959                                 cfg->exception_message = g_strdup ("pinvoke + vtypes");
1960                                 cfg->disable_llvm = TRUE;
1961                                 return linfo;
1962                         }
1963
1964                         linfo->args [i].storage = LLVMArgVtypeInReg;
1965                         for (j = 0; j < 2; ++j)
1966                                 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
1967                         break;
1968                 case ArgGSharedVtInReg:
1969                 case ArgGSharedVtOnStack:
1970                         linfo->args [i].storage = LLVMArgGSharedVt;
1971                         break;
1972                 default:
1973                         cfg->exception_message = g_strdup ("ainfo->storage");
1974                         cfg->disable_llvm = TRUE;
1975                         break;
1976                 }
1977         }
1978
1979         return linfo;
1980 }
1981 #endif
1982
1983 void
1984 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1985 {
1986         MonoInst *arg, *in;
1987         MonoMethodSignature *sig;
1988         MonoType *sig_ret;
1989         int i, n;
1990         CallInfo *cinfo;
1991         ArgInfo *ainfo;
1992
1993         sig = call->signature;
1994         n = sig->param_count + sig->hasthis;
1995
1996         cinfo = get_call_info (cfg->mempool, sig);
1997
1998         sig_ret = sig->ret;
1999
2000         if (COMPILE_LLVM (cfg)) {
2001                 /* We shouldn't be called in the llvm case */
2002                 cfg->disable_llvm = TRUE;
2003                 return;
2004         }
2005
2006         /* 
2007          * Emit all arguments which are passed on the stack to prevent register
2008          * allocation problems.
2009          */
2010         for (i = 0; i < n; ++i) {
2011                 MonoType *t;
2012                 ainfo = cinfo->args + i;
2013
2014                 in = call->args [i];
2015
2016                 if (sig->hasthis && i == 0)
2017                         t = &mono_defaults.object_class->byval_arg;
2018                 else
2019                         t = sig->params [i - sig->hasthis];
2020
2021                 t = mini_get_underlying_type (t);
2022                 //XXX what about ArgGSharedVtOnStack here?
2023                 if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call) {
2024                         if (!t->byref) {
2025                                 if (t->type == MONO_TYPE_R4)
2026                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2027                                 else if (t->type == MONO_TYPE_R8)
2028                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2029                                 else
2030                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2031                         } else {
2032                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2033                         }
2034                         if (cfg->compute_gc_maps) {
2035                                 MonoInst *def;
2036
2037                                 EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg, def, ainfo->offset, t);
2038                         }
2039                 }
2040         }
2041
2042         /*
2043          * Emit all parameters passed in registers in non-reverse order for better readability
2044          * and to help the optimization in emit_prolog ().
2045          */
2046         for (i = 0; i < n; ++i) {
2047                 ainfo = cinfo->args + i;
2048
2049                 in = call->args [i];
2050
2051                 if (ainfo->storage == ArgInIReg)
2052                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
2053         }
2054
2055         for (i = n - 1; i >= 0; --i) {
2056                 MonoType *t;
2057
2058                 ainfo = cinfo->args + i;
2059
2060                 in = call->args [i];
2061
2062                 if (sig->hasthis && i == 0)
2063                         t = &mono_defaults.object_class->byval_arg;
2064                 else
2065                         t = sig->params [i - sig->hasthis];
2066                 t = mini_get_underlying_type (t);
2067
2068                 switch (ainfo->storage) {
2069                 case ArgInIReg:
2070                         /* Already done */
2071                         break;
2072                 case ArgInFloatSSEReg:
2073                 case ArgInDoubleSSEReg:
2074                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
2075                         break;
2076                 case ArgOnStack:
2077                 case ArgValuetypeInReg:
2078                 case ArgValuetypeAddrInIReg:
2079                 case ArgGSharedVtInReg:
2080                 case ArgGSharedVtOnStack: {
2081                         if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call)
2082                                 /* Already emitted above */
2083                                 break;
2084                         //FIXME what about ArgGSharedVtOnStack ?
2085                         if (ainfo->storage == ArgOnStack && call->tail_call) {
2086                                 MonoInst *call_inst = (MonoInst*)call;
2087                                 cfg->args [i]->flags |= MONO_INST_VOLATILE;
2088                                 EMIT_NEW_ARGSTORE (cfg, call_inst, i, in);
2089                                 break;
2090                         }
2091
2092                         guint32 align;
2093                         guint32 size;
2094
2095                         if (sig->pinvoke)
2096                                 size = mono_type_native_stack_size (t, &align);
2097                         else {
2098                                 /*
2099                                  * Other backends use mono_type_stack_size (), but that
2100                                  * aligns the size to 8, which is larger than the size of
2101                                  * the source, leading to reads of invalid memory if the
2102                                  * source is at the end of address space.
2103                                  */
2104                                 size = mono_class_value_size (mono_class_from_mono_type (t), &align);
2105                         }
2106
2107                         if (size >= 10000) {
2108                                 /* Avoid asserts in emit_memcpy () */
2109                                 mono_cfg_set_exception_invalid_program (cfg, g_strdup_printf ("Passing an argument of size '%d'.", size));
2110                                 /* Continue normally */
2111                         }
2112
2113                         if (size > 0) {
2114                                 MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
2115                                 arg->sreg1 = in->dreg;
2116                                 arg->klass = mono_class_from_mono_type (t);
2117                                 arg->backend.size = size;
2118                                 arg->inst_p0 = call;
2119                                 arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
2120                                 memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
2121
2122                                 MONO_ADD_INS (cfg->cbb, arg);
2123                         }
2124                         break;
2125                 }
2126                 default:
2127                         g_assert_not_reached ();
2128                 }
2129
2130                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos))
2131                         /* Emit the signature cookie just before the implicit arguments */
2132                         emit_sig_cookie (cfg, call, cinfo);
2133         }
2134
2135         /* Handle the case where there are no implicit arguments */
2136         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
2137                 emit_sig_cookie (cfg, call, cinfo);
2138
2139         switch (cinfo->ret.storage) {
2140         case ArgValuetypeInReg:
2141                 if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
2142                         /*
2143                          * Tell the JIT to use a more efficient calling convention: call using
2144                          * OP_CALL, compute the result location after the call, and save the
2145                          * result there.
2146                          */
2147                         call->vret_in_reg = TRUE;
2148                         /*
2149                          * Nullify the instruction computing the vret addr to enable
2150                          * future optimizations.
2151                          */
2152                         if (call->vret_var)
2153                                 NULLIFY_INS (call->vret_var);
2154                 } else {
2155                         if (call->tail_call)
2156                                 NOT_IMPLEMENTED;
2157                         /*
2158                          * The valuetype is in RAX:RDX after the call, need to be copied to
2159                          * the stack. Push the address here, so the call instruction can
2160                          * access it.
2161                          */
2162                         if (!cfg->arch.vret_addr_loc) {
2163                                 cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
2164                                 /* Prevent it from being register allocated or optimized away */
2165                                 ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
2166                         }
2167
2168                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
2169                 }
2170                 break;
2171         case ArgValuetypeAddrInIReg:
2172         case ArgGsharedvtVariableInReg: {
2173                 MonoInst *vtarg;
2174                 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
2175                 vtarg->sreg1 = call->vret_var->dreg;
2176                 vtarg->dreg = mono_alloc_preg (cfg);
2177                 MONO_ADD_INS (cfg->cbb, vtarg);
2178
2179                 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
2180                 break;
2181         }
2182         default:
2183                 break;
2184         }
2185
2186         if (cfg->method->save_lmf) {
2187                 MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF);
2188                 MONO_ADD_INS (cfg->cbb, arg);
2189         }
2190
2191         call->stack_usage = cinfo->stack_usage;
2192 }
2193
2194 void
2195 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
2196 {
2197         MonoInst *arg;
2198         MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
2199         ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
2200         int size = ins->backend.size;
2201
2202         switch (ainfo->storage) {
2203         case ArgValuetypeInReg: {
2204                 MonoInst *load;
2205                 int part;
2206
2207                 for (part = 0; part < 2; ++part) {
2208                         if (ainfo->pair_storage [part] == ArgNone)
2209                                 continue;
2210
2211                         MONO_INST_NEW (cfg, load, arg_storage_to_load_membase (ainfo->pair_storage [part]));
2212                         load->inst_basereg = src->dreg;
2213                         load->inst_offset = part * sizeof(mgreg_t);
2214
2215                         switch (ainfo->pair_storage [part]) {
2216                         case ArgInIReg:
2217                                 load->dreg = mono_alloc_ireg (cfg);
2218                                 break;
2219                         case ArgInDoubleSSEReg:
2220                         case ArgInFloatSSEReg:
2221                                 load->dreg = mono_alloc_freg (cfg);
2222                                 break;
2223                         default:
2224                                 g_assert_not_reached ();
2225                         }
2226                         MONO_ADD_INS (cfg->cbb, load);
2227
2228                         add_outarg_reg (cfg, call, ainfo->pair_storage [part], ainfo->pair_regs [part], load);
2229                 }
2230                 break;
2231         }
2232         case ArgValuetypeAddrInIReg: {
2233                 MonoInst *vtaddr, *load;
2234                 vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
2235                 
2236                 MONO_INST_NEW (cfg, load, OP_LDADDR);
2237                 cfg->has_indirection = TRUE;
2238                 load->inst_p0 = vtaddr;
2239                 vtaddr->flags |= MONO_INST_INDIRECT;
2240                 load->type = STACK_MP;
2241                 load->klass = vtaddr->klass;
2242                 load->dreg = mono_alloc_ireg (cfg);
2243                 MONO_ADD_INS (cfg->cbb, load);
2244                 mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 4);
2245
2246                 if (ainfo->pair_storage [0] == ArgInIReg) {
2247                         MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE);
2248                         arg->dreg = mono_alloc_ireg (cfg);
2249                         arg->sreg1 = load->dreg;
2250                         arg->inst_imm = 0;
2251                         MONO_ADD_INS (cfg->cbb, arg);
2252                         mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, ainfo->pair_regs [0], FALSE);
2253                 } else {
2254                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, load->dreg);
2255                 }
2256                 break;
2257         }
2258         case ArgGSharedVtInReg:
2259                 /* Pass by addr */
2260                 mono_call_inst_add_outarg_reg (cfg, call, src->dreg, ainfo->reg, FALSE);
2261                 break;
2262         case ArgGSharedVtOnStack:
2263                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, src->dreg);
2264                 break;
2265         default:
2266                 if (size == 8) {
2267                         int dreg = mono_alloc_ireg (cfg);
2268
2269                         MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
2270                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, dreg);
2271                 } else if (size <= 40) {
2272                         mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2273                 } else {
2274                         // FIXME: Code growth
2275                         mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2276                 }
2277
2278                 if (cfg->compute_gc_maps) {
2279                         MonoInst *def;
2280                         EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg, def, ainfo->offset, &ins->klass->byval_arg);
2281                 }
2282         }
2283 }
2284
2285 void
2286 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
2287 {
2288         MonoType *ret = mini_get_underlying_type (mono_method_signature (method)->ret);
2289
2290         if (ret->type == MONO_TYPE_R4) {
2291                 if (COMPILE_LLVM (cfg))
2292                         MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2293                 else
2294                         MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg);
2295                 return;
2296         } else if (ret->type == MONO_TYPE_R8) {
2297                 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2298                 return;
2299         }
2300                         
2301         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
2302 }
2303
2304 #endif /* DISABLE_JIT */
2305
2306 #define EMIT_COND_BRANCH(ins,cond,sign) \
2307         if (ins->inst_true_bb->native_offset) { \
2308                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
2309         } else { \
2310                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
2311                 if ((cfg->opt & MONO_OPT_BRANCH) && \
2312             x86_is_imm8 (ins->inst_true_bb->max_offset - offset)) \
2313                         x86_branch8 (code, cond, 0, sign); \
2314                 else \
2315                         x86_branch32 (code, cond, 0, sign); \
2316 }
2317
2318 typedef struct {
2319         MonoMethodSignature *sig;
2320         CallInfo *cinfo;
2321 } ArchDynCallInfo;
2322
2323 static gboolean
2324 dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo)
2325 {
2326         int i;
2327
2328 #ifdef HOST_WIN32
2329         return FALSE;
2330 #endif
2331
2332         switch (cinfo->ret.storage) {
2333         case ArgNone:
2334         case ArgInIReg:
2335         case ArgInFloatSSEReg:
2336         case ArgInDoubleSSEReg:
2337                 break;
2338         case ArgValuetypeInReg: {
2339                 ArgInfo *ainfo = &cinfo->ret;
2340
2341                 if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2342                         return FALSE;
2343                 if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2344                         return FALSE;
2345                 break;
2346         }
2347         default:
2348                 return FALSE;
2349         }
2350
2351         for (i = 0; i < cinfo->nargs; ++i) {
2352                 ArgInfo *ainfo = &cinfo->args [i];
2353                 switch (ainfo->storage) {
2354                 case ArgInIReg:
2355                 case ArgInFloatSSEReg:
2356                 case ArgInDoubleSSEReg:
2357                         break;
2358                 case ArgValuetypeInReg:
2359                         if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2360                                 return FALSE;
2361                         if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2362                                 return FALSE;
2363                         break;
2364                 default:
2365                         return FALSE;
2366                 }
2367         }
2368
2369         return TRUE;
2370 }
2371
2372 /*
2373  * mono_arch_dyn_call_prepare:
2374  *
2375  *   Return a pointer to an arch-specific structure which contains information 
2376  * needed by mono_arch_get_dyn_call_args (). Return NULL if OP_DYN_CALL is not
2377  * supported for SIG.
2378  * This function is equivalent to ffi_prep_cif in libffi.
2379  */
2380 MonoDynCallInfo*
2381 mono_arch_dyn_call_prepare (MonoMethodSignature *sig)
2382 {
2383         ArchDynCallInfo *info;
2384         CallInfo *cinfo;
2385
2386         cinfo = get_call_info (NULL, sig);
2387
2388         if (!dyn_call_supported (sig, cinfo)) {
2389                 g_free (cinfo);
2390                 return NULL;
2391         }
2392
2393         info = g_new0 (ArchDynCallInfo, 1);
2394         // FIXME: Preprocess the info to speed up get_dyn_call_args ().
2395         info->sig = sig;
2396         info->cinfo = cinfo;
2397         
2398         return (MonoDynCallInfo*)info;
2399 }
2400
2401 /*
2402  * mono_arch_dyn_call_free:
2403  *
2404  *   Free a MonoDynCallInfo structure.
2405  */
2406 void
2407 mono_arch_dyn_call_free (MonoDynCallInfo *info)
2408 {
2409         ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
2410
2411         g_free (ainfo->cinfo);
2412         g_free (ainfo);
2413 }
2414
2415 #define PTR_TO_GREG(ptr) (mgreg_t)(ptr)
2416 #define GREG_TO_PTR(greg) (gpointer)(greg)
2417
2418 /*
2419  * mono_arch_get_start_dyn_call:
2420  *
2421  *   Convert the arguments ARGS to a format which can be passed to OP_DYN_CALL, and
2422  * store the result into BUF.
2423  * ARGS should be an array of pointers pointing to the arguments.
2424  * RET should point to a memory buffer large enought to hold the result of the
2425  * call.
2426  * This function should be as fast as possible, any work which does not depend
2427  * on the actual values of the arguments should be done in 
2428  * mono_arch_dyn_call_prepare ().
2429  * start_dyn_call + OP_DYN_CALL + finish_dyn_call is equivalent to ffi_call in
2430  * libffi.
2431  */
2432 void
2433 mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
2434 {
2435         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2436         DynCallArgs *p = (DynCallArgs*)buf;
2437         int arg_index, greg, freg, i, pindex;
2438         MonoMethodSignature *sig = dinfo->sig;
2439         int buffer_offset = 0;
2440
2441         g_assert (buf_len >= sizeof (DynCallArgs));
2442
2443         p->res = 0;
2444         p->ret = ret;
2445
2446         arg_index = 0;
2447         greg = 0;
2448         freg = 0;
2449         pindex = 0;
2450
2451         if (sig->hasthis || dinfo->cinfo->vret_arg_index == 1) {
2452                 p->regs [greg ++] = PTR_TO_GREG(*(args [arg_index ++]));
2453                 if (!sig->hasthis)
2454                         pindex = 1;
2455         }
2456
2457         if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg || dinfo->cinfo->ret.storage == ArgGsharedvtVariableInReg)
2458                 p->regs [greg ++] = PTR_TO_GREG(ret);
2459
2460         for (i = pindex; i < sig->param_count; i++) {
2461                 MonoType *t = mini_get_underlying_type (sig->params [i]);
2462                 gpointer *arg = args [arg_index ++];
2463
2464                 if (t->byref) {
2465                         p->regs [greg ++] = PTR_TO_GREG(*(arg));
2466                         continue;
2467                 }
2468
2469                 switch (t->type) {
2470                 case MONO_TYPE_STRING:
2471                 case MONO_TYPE_CLASS:  
2472                 case MONO_TYPE_ARRAY:
2473                 case MONO_TYPE_SZARRAY:
2474                 case MONO_TYPE_OBJECT:
2475                 case MONO_TYPE_PTR:
2476                 case MONO_TYPE_I:
2477                 case MONO_TYPE_U:
2478 #if !defined(__mono_ilp32__)
2479                 case MONO_TYPE_I8:
2480                 case MONO_TYPE_U8:
2481 #endif
2482                         g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
2483                         p->regs [greg ++] = PTR_TO_GREG(*(arg));
2484                         break;
2485 #if defined(__mono_ilp32__)
2486                 case MONO_TYPE_I8:
2487                 case MONO_TYPE_U8:
2488                         g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
2489                         p->regs [greg ++] = *(guint64*)(arg);
2490                         break;
2491 #endif
2492                 case MONO_TYPE_U1:
2493                         p->regs [greg ++] = *(guint8*)(arg);
2494                         break;
2495                 case MONO_TYPE_I1:
2496                         p->regs [greg ++] = *(gint8*)(arg);
2497                         break;
2498                 case MONO_TYPE_I2:
2499                         p->regs [greg ++] = *(gint16*)(arg);
2500                         break;
2501                 case MONO_TYPE_U2:
2502                         p->regs [greg ++] = *(guint16*)(arg);
2503                         break;
2504                 case MONO_TYPE_I4:
2505                         p->regs [greg ++] = *(gint32*)(arg);
2506                         break;
2507                 case MONO_TYPE_U4:
2508                         p->regs [greg ++] = *(guint32*)(arg);
2509                         break;
2510                 case MONO_TYPE_R4: {
2511                         double d;
2512
2513                         *(float*)&d = *(float*)(arg);
2514                         p->has_fp = 1;
2515                         p->fregs [freg ++] = d;
2516                         break;
2517                 }
2518                 case MONO_TYPE_R8:
2519                         p->has_fp = 1;
2520                         p->fregs [freg ++] = *(double*)(arg);
2521                         break;
2522                 case MONO_TYPE_GENERICINST:
2523                     if (MONO_TYPE_IS_REFERENCE (t)) {
2524                                 p->regs [greg ++] = PTR_TO_GREG(*(arg));
2525                                 break;
2526                         } else if (t->type == MONO_TYPE_GENERICINST && mono_class_is_nullable (mono_class_from_mono_type (t))) {
2527                                         MonoClass *klass = mono_class_from_mono_type (t);
2528                                         guint8 *nullable_buf;
2529                                         int size;
2530
2531                                         size = mono_class_value_size (klass, NULL);
2532                                         nullable_buf = p->buffer + buffer_offset;
2533                                         buffer_offset += size;
2534                                         g_assert (buffer_offset <= 256);
2535
2536                                         /* The argument pointed to by arg is either a boxed vtype or null */
2537                                         mono_nullable_init (nullable_buf, (MonoObject*)arg, klass);
2538
2539                                         arg = (gpointer*)nullable_buf;
2540                                         /* Fall though */
2541
2542                         } else {
2543                                 /* Fall through */
2544                         }
2545                 case MONO_TYPE_VALUETYPE: {
2546                         ArgInfo *ainfo = &dinfo->cinfo->args [i + sig->hasthis];
2547
2548                         g_assert (ainfo->storage == ArgValuetypeInReg);
2549                         if (ainfo->pair_storage [0] != ArgNone) {
2550                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2551                                 p->regs [greg ++] = ((mgreg_t*)(arg))[0];
2552                         }
2553                         if (ainfo->pair_storage [1] != ArgNone) {
2554                                 g_assert (ainfo->pair_storage [1] == ArgInIReg);
2555                                 p->regs [greg ++] = ((mgreg_t*)(arg))[1];
2556                         }
2557                         break;
2558                 }
2559                 default:
2560                         g_assert_not_reached ();
2561                 }
2562         }
2563
2564         g_assert (greg <= PARAM_REGS);
2565 }
2566
2567 /*
2568  * mono_arch_finish_dyn_call:
2569  *
2570  *   Store the result of a dyn call into the return value buffer passed to
2571  * start_dyn_call ().
2572  * This function should be as fast as possible, any work which does not depend
2573  * on the actual values of the arguments should be done in 
2574  * mono_arch_dyn_call_prepare ().
2575  */
2576 void
2577 mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
2578 {
2579         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2580         MonoMethodSignature *sig = dinfo->sig;
2581         DynCallArgs *dargs = (DynCallArgs*)buf;
2582         guint8 *ret = dargs->ret;
2583         mgreg_t res = dargs->res;
2584         MonoType *sig_ret = mini_get_underlying_type (sig->ret);
2585
2586         switch (sig_ret->type) {
2587         case MONO_TYPE_VOID:
2588                 *(gpointer*)ret = NULL;
2589                 break;
2590         case MONO_TYPE_STRING:
2591         case MONO_TYPE_CLASS:  
2592         case MONO_TYPE_ARRAY:
2593         case MONO_TYPE_SZARRAY:
2594         case MONO_TYPE_OBJECT:
2595         case MONO_TYPE_I:
2596         case MONO_TYPE_U:
2597         case MONO_TYPE_PTR:
2598                 *(gpointer*)ret = GREG_TO_PTR(res);
2599                 break;
2600         case MONO_TYPE_I1:
2601                 *(gint8*)ret = res;
2602                 break;
2603         case MONO_TYPE_U1:
2604                 *(guint8*)ret = res;
2605                 break;
2606         case MONO_TYPE_I2:
2607                 *(gint16*)ret = res;
2608                 break;
2609         case MONO_TYPE_U2:
2610                 *(guint16*)ret = res;
2611                 break;
2612         case MONO_TYPE_I4:
2613                 *(gint32*)ret = res;
2614                 break;
2615         case MONO_TYPE_U4:
2616                 *(guint32*)ret = res;
2617                 break;
2618         case MONO_TYPE_I8:
2619                 *(gint64*)ret = res;
2620                 break;
2621         case MONO_TYPE_U8:
2622                 *(guint64*)ret = res;
2623                 break;
2624         case MONO_TYPE_R4:
2625                 *(float*)ret = *(float*)&(dargs->fregs [0]);
2626                 break;
2627         case MONO_TYPE_R8:
2628                 *(double*)ret = dargs->fregs [0];
2629                 break;
2630         case MONO_TYPE_GENERICINST:
2631                 if (MONO_TYPE_IS_REFERENCE (sig_ret)) {
2632                         *(gpointer*)ret = GREG_TO_PTR(res);
2633                         break;
2634                 } else {
2635                         /* Fall through */
2636                 }
2637         case MONO_TYPE_VALUETYPE:
2638                 if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg || dinfo->cinfo->ret.storage == ArgGsharedvtVariableInReg) {
2639                         /* Nothing to do */
2640                 } else {
2641                         ArgInfo *ainfo = &dinfo->cinfo->ret;
2642
2643                         g_assert (ainfo->storage == ArgValuetypeInReg);
2644
2645                         if (ainfo->pair_storage [0] != ArgNone) {
2646                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2647                                 ((mgreg_t*)ret)[0] = res;
2648                         }
2649
2650                         g_assert (ainfo->pair_storage [1] == ArgNone);
2651                 }
2652                 break;
2653         default:
2654                 g_assert_not_reached ();
2655         }
2656 }
2657
2658 /* emit an exception if condition is fail */
2659 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
2660         do {                                                        \
2661                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
2662                 if (tins == NULL) {                                                                             \
2663                         mono_add_patch_info (cfg, code - cfg->native_code,   \
2664                                         MONO_PATCH_INFO_EXC, exc_name);  \
2665                         x86_branch32 (code, cond, 0, signed);               \
2666                 } else {        \
2667                         EMIT_COND_BRANCH (tins, cond, signed);  \
2668                 }                       \
2669         } while (0); 
2670
2671 #define EMIT_FPCOMPARE(code) do { \
2672         amd64_fcompp (code); \
2673         amd64_fnstsw (code); \
2674 } while (0); 
2675
2676 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
2677     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
2678         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
2679         amd64_ ##op (code); \
2680         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
2681         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
2682 } while (0);
2683
2684 static guint8*
2685 emit_call_body (MonoCompile *cfg, guint8 *code, MonoJumpInfoType patch_type, gconstpointer data)
2686 {
2687         gboolean no_patch = FALSE;
2688
2689         /* 
2690          * FIXME: Add support for thunks
2691          */
2692         {
2693                 gboolean near_call = FALSE;
2694
2695                 /*
2696                  * Indirect calls are expensive so try to make a near call if possible.
2697                  * The caller memory is allocated by the code manager so it is 
2698                  * guaranteed to be at a 32 bit offset.
2699                  */
2700
2701                 if (patch_type != MONO_PATCH_INFO_ABS) {
2702                         /* The target is in memory allocated using the code manager */
2703                         near_call = TRUE;
2704
2705                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
2706                                 if (((MonoMethod*)data)->klass->image->aot_module)
2707                                         /* The callee might be an AOT method */
2708                                         near_call = FALSE;
2709                                 if (((MonoMethod*)data)->dynamic)
2710                                         /* The target is in malloc-ed memory */
2711                                         near_call = FALSE;
2712                         }
2713
2714                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
2715                                 /* 
2716                                  * The call might go directly to a native function without
2717                                  * the wrapper.
2718                                  */
2719                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name ((const char *)data);
2720                                 if (mi) {
2721                                         gconstpointer target = mono_icall_get_wrapper (mi);
2722                                         if ((((guint64)target) >> 32) != 0)
2723                                                 near_call = FALSE;
2724                                 }
2725                         }
2726                 }
2727                 else {
2728                         MonoJumpInfo *jinfo = NULL;
2729
2730                         if (cfg->abs_patches)
2731                                 jinfo = (MonoJumpInfo *)g_hash_table_lookup (cfg->abs_patches, data);
2732                         if (jinfo) {
2733                                 if (jinfo->type == MONO_PATCH_INFO_JIT_ICALL_ADDR) {
2734                                         MonoJitICallInfo *mi = mono_find_jit_icall_by_name (jinfo->data.name);
2735                                         if (mi && (((guint64)mi->func) >> 32) == 0)
2736                                                 near_call = TRUE;
2737                                         no_patch = TRUE;
2738                                 } else {
2739                                         /* 
2740                                          * This is not really an optimization, but required because the
2741                                          * generic class init trampolines use R11 to pass the vtable.
2742                                          */
2743                                         near_call = TRUE;
2744                                 }
2745                         } else {
2746                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
2747                                 if (info) {
2748                                         if (info->func == info->wrapper) {
2749                                                 /* No wrapper */
2750                                                 if ((((guint64)info->func) >> 32) == 0)
2751                                                         near_call = TRUE;
2752                                         }
2753                                         else {
2754                                                 /* See the comment in mono_codegen () */
2755                                                 if ((info->name [0] != 'v') || (strstr (info->name, "ves_array_new_va_") == NULL && strstr (info->name, "ves_array_element_address_") == NULL))
2756                                                         near_call = TRUE;
2757                                         }
2758                                 }
2759                                 else if ((((guint64)data) >> 32) == 0) {
2760                                         near_call = TRUE;
2761                                         no_patch = TRUE;
2762                                 }
2763                         }
2764                 }
2765
2766                 if (cfg->method->dynamic)
2767                         /* These methods are allocated using malloc */
2768                         near_call = FALSE;
2769
2770 #ifdef MONO_ARCH_NOMAP32BIT
2771                 near_call = FALSE;
2772 #endif
2773                 /* The 64bit XEN kernel does not honour the MAP_32BIT flag. (#522894) */
2774                 if (optimize_for_xen)
2775                         near_call = FALSE;
2776
2777                 if (cfg->compile_aot) {
2778                         near_call = TRUE;
2779                         no_patch = TRUE;
2780                 }
2781
2782                 if (near_call) {
2783                         /* 
2784                          * Align the call displacement to an address divisible by 4 so it does
2785                          * not span cache lines. This is required for code patching to work on SMP
2786                          * systems.
2787                          */
2788                         if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0) {
2789                                 guint32 pad_size = 4 - ((guint32)(code + 1 - cfg->native_code) % 4);
2790                                 amd64_padding (code, pad_size);
2791                         }
2792                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
2793                         amd64_call_code (code, 0);
2794                 }
2795                 else {
2796                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
2797                         amd64_set_reg_template (code, GP_SCRATCH_REG);
2798                         amd64_call_reg (code, GP_SCRATCH_REG);
2799                 }
2800         }
2801
2802         return code;
2803 }
2804
2805 static inline guint8*
2806 emit_call (MonoCompile *cfg, guint8 *code, MonoJumpInfoType patch_type, gconstpointer data, gboolean win64_adjust_stack)
2807 {
2808 #ifdef TARGET_WIN32
2809         if (win64_adjust_stack)
2810                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 32);
2811 #endif
2812         code = emit_call_body (cfg, code, patch_type, data);
2813 #ifdef TARGET_WIN32
2814         if (win64_adjust_stack)
2815                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 32);
2816 #endif  
2817         
2818         return code;
2819 }
2820
2821 static inline int
2822 store_membase_imm_to_store_membase_reg (int opcode)
2823 {
2824         switch (opcode) {
2825         case OP_STORE_MEMBASE_IMM:
2826                 return OP_STORE_MEMBASE_REG;
2827         case OP_STOREI4_MEMBASE_IMM:
2828                 return OP_STOREI4_MEMBASE_REG;
2829         case OP_STOREI8_MEMBASE_IMM:
2830                 return OP_STOREI8_MEMBASE_REG;
2831         }
2832
2833         return -1;
2834 }
2835
2836 #ifndef DISABLE_JIT
2837
2838 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
2839
2840 /*
2841  * mono_arch_peephole_pass_1:
2842  *
2843  *   Perform peephole opts which should/can be performed before local regalloc
2844  */
2845 void
2846 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
2847 {
2848         MonoInst *ins, *n;
2849
2850         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
2851                 MonoInst *last_ins = mono_inst_prev (ins, FILTER_IL_SEQ_POINT);
2852
2853                 switch (ins->opcode) {
2854                 case OP_ADD_IMM:
2855                 case OP_IADD_IMM:
2856                 case OP_LADD_IMM:
2857                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS) && (ins->inst_imm > 0)) {
2858                                 /* 
2859                                  * X86_LEA is like ADD, but doesn't have the
2860                                  * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends 
2861                                  * its operand to 64 bit.
2862                                  */
2863                                 ins->opcode = OP_X86_LEA_MEMBASE;
2864                                 ins->inst_basereg = ins->sreg1;
2865                         }
2866                         break;
2867                 case OP_LXOR:
2868                 case OP_IXOR:
2869                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2870                                 MonoInst *ins2;
2871
2872                                 /* 
2873                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
2874                                  * the latter has length 2-3 instead of 6 (reverse constant
2875                                  * propagation). These instruction sequences are very common
2876                                  * in the initlocals bblock.
2877                                  */
2878                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
2879                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
2880                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
2881                                                 ins2->sreg1 = ins->dreg;
2882                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG)) {
2883                                                 /* Continue */
2884                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
2885                                                 NULLIFY_INS (ins2);
2886                                                 /* Continue */
2887                                         } else if (ins2->opcode == OP_IL_SEQ_POINT) {
2888                                                 /* Continue */
2889                                         } else {
2890                                                 break;
2891                                         }
2892                                 }
2893                         }
2894                         break;
2895                 case OP_COMPARE_IMM:
2896                 case OP_LCOMPARE_IMM:
2897                         /* OP_COMPARE_IMM (reg, 0) 
2898                          * --> 
2899                          * OP_AMD64_TEST_NULL (reg) 
2900                          */
2901                         if (!ins->inst_imm)
2902                                 ins->opcode = OP_AMD64_TEST_NULL;
2903                         break;
2904                 case OP_ICOMPARE_IMM:
2905                         if (!ins->inst_imm)
2906                                 ins->opcode = OP_X86_TEST_NULL;
2907                         break;
2908                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
2909                         /* 
2910                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
2911                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
2912                          * -->
2913                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
2914                          * OP_COMPARE_IMM reg, imm
2915                          *
2916                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
2917                          */
2918                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
2919                             ins->inst_basereg == last_ins->inst_destbasereg &&
2920                             ins->inst_offset == last_ins->inst_offset) {
2921                                         ins->opcode = OP_ICOMPARE_IMM;
2922                                         ins->sreg1 = last_ins->sreg1;
2923
2924                                         /* check if we can remove cmp reg,0 with test null */
2925                                         if (!ins->inst_imm)
2926                                                 ins->opcode = OP_X86_TEST_NULL;
2927                                 }
2928
2929                         break;
2930                 }
2931
2932                 mono_peephole_ins (bb, ins);
2933         }
2934 }
2935
2936 void
2937 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
2938 {
2939         MonoInst *ins, *n;
2940
2941         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
2942                 switch (ins->opcode) {
2943                 case OP_ICONST:
2944                 case OP_I8CONST: {
2945                         MonoInst *next = mono_inst_next (ins, FILTER_IL_SEQ_POINT);
2946                         /* reg = 0 -> XOR (reg, reg) */
2947                         /* XOR sets cflags on x86, so we cant do it always */
2948                         if (ins->inst_c0 == 0 && (!next || (next && INST_IGNORES_CFLAGS (next->opcode)))) {
2949                                 ins->opcode = OP_LXOR;
2950                                 ins->sreg1 = ins->dreg;
2951                                 ins->sreg2 = ins->dreg;
2952                                 /* Fall through */
2953                         } else {
2954                                 break;
2955                         }
2956                 }
2957                 case OP_LXOR:
2958                         /*
2959                          * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the 
2960                          * 0 result into 64 bits.
2961                          */
2962                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2963                                 ins->opcode = OP_IXOR;
2964                         }
2965                         /* Fall through */
2966                 case OP_IXOR:
2967                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
2968                                 MonoInst *ins2;
2969
2970                                 /* 
2971                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
2972                                  * the latter has length 2-3 instead of 6 (reverse constant
2973                                  * propagation). These instruction sequences are very common
2974                                  * in the initlocals bblock.
2975                                  */
2976                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
2977                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
2978                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
2979                                                 ins2->sreg1 = ins->dreg;
2980                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_REG) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG) || (ins2->opcode == OP_LIVERANGE_START) || (ins2->opcode == OP_GC_LIVENESS_DEF) || (ins2->opcode == OP_GC_LIVENESS_USE)) {
2981                                                 /* Continue */
2982                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
2983                                                 NULLIFY_INS (ins2);
2984                                                 /* Continue */
2985                                         } else if (ins2->opcode == OP_IL_SEQ_POINT) {
2986                                                 /* Continue */
2987                                         } else {
2988                                                 break;
2989                                         }
2990                                 }
2991                         }
2992                         break;
2993                 case OP_IADD_IMM:
2994                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
2995                                 ins->opcode = OP_X86_INC_REG;
2996                         break;
2997                 case OP_ISUB_IMM:
2998                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
2999                                 ins->opcode = OP_X86_DEC_REG;
3000                         break;
3001                 }
3002
3003                 mono_peephole_ins (bb, ins);
3004         }
3005 }
3006
3007 #define NEW_INS(cfg,ins,dest,op) do {   \
3008                 MONO_INST_NEW ((cfg), (dest), (op)); \
3009         (dest)->cil_code = (ins)->cil_code; \
3010         mono_bblock_insert_before_ins (bb, ins, (dest)); \
3011         } while (0)
3012
3013 /*
3014  * mono_arch_lowering_pass:
3015  *
3016  *  Converts complex opcodes into simpler ones so that each IR instruction
3017  * corresponds to one machine instruction.
3018  */
3019 void
3020 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
3021 {
3022         MonoInst *ins, *n, *temp;
3023
3024         /*
3025          * FIXME: Need to add more instructions, but the current machine 
3026          * description can't model some parts of the composite instructions like
3027          * cdq.
3028          */
3029         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
3030                 switch (ins->opcode) {
3031                 case OP_DIV_IMM:
3032                 case OP_REM_IMM:
3033                 case OP_IDIV_IMM:
3034                 case OP_IDIV_UN_IMM:
3035                 case OP_IREM_UN_IMM:
3036                 case OP_LREM_IMM:
3037                 case OP_IREM_IMM:
3038                         mono_decompose_op_imm (cfg, bb, ins);
3039                         break;
3040                 case OP_COMPARE_IMM:
3041                 case OP_LCOMPARE_IMM:
3042                         if (!amd64_use_imm32 (ins->inst_imm)) {
3043                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3044                                 temp->inst_c0 = ins->inst_imm;
3045                                 temp->dreg = mono_alloc_ireg (cfg);
3046                                 ins->opcode = OP_COMPARE;
3047                                 ins->sreg2 = temp->dreg;
3048                         }
3049                         break;
3050 #ifndef __mono_ilp32__
3051                 case OP_LOAD_MEMBASE:
3052 #endif
3053                 case OP_LOADI8_MEMBASE:
3054                 /*  Don't generate memindex opcodes (to simplify */
3055                 /*  read sandboxing) */
3056                         if (!amd64_use_imm32 (ins->inst_offset)) {
3057                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3058                                 temp->inst_c0 = ins->inst_offset;
3059                                 temp->dreg = mono_alloc_ireg (cfg);
3060                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
3061                                 ins->inst_indexreg = temp->dreg;
3062                         }
3063                         break;
3064 #ifndef __mono_ilp32__
3065                 case OP_STORE_MEMBASE_IMM:
3066 #endif
3067                 case OP_STOREI8_MEMBASE_IMM:
3068                         if (!amd64_use_imm32 (ins->inst_imm)) {
3069                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3070                                 temp->inst_c0 = ins->inst_imm;
3071                                 temp->dreg = mono_alloc_ireg (cfg);
3072                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
3073                                 ins->sreg1 = temp->dreg;
3074                         }
3075                         break;
3076 #ifdef MONO_ARCH_SIMD_INTRINSICS
3077                 case OP_EXPAND_I1: {
3078                                 int temp_reg1 = mono_alloc_ireg (cfg);
3079                                 int temp_reg2 = mono_alloc_ireg (cfg);
3080                                 int original_reg = ins->sreg1;
3081
3082                                 NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
3083                                 temp->sreg1 = original_reg;
3084                                 temp->dreg = temp_reg1;
3085
3086                                 NEW_INS (cfg, ins, temp, OP_SHL_IMM);
3087                                 temp->sreg1 = temp_reg1;
3088                                 temp->dreg = temp_reg2;
3089                                 temp->inst_imm = 8;
3090
3091                                 NEW_INS (cfg, ins, temp, OP_LOR);
3092                                 temp->sreg1 = temp->dreg = temp_reg2;
3093                                 temp->sreg2 = temp_reg1;
3094
3095                                 ins->opcode = OP_EXPAND_I2;
3096                                 ins->sreg1 = temp_reg2;
3097                         }
3098                         break;
3099 #endif
3100                 default:
3101                         break;
3102                 }
3103         }
3104
3105         bb->max_vreg = cfg->next_vreg;
3106 }
3107
3108 static const int 
3109 branch_cc_table [] = {
3110         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
3111         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
3112         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
3113 };
3114
3115 /* Maps CMP_... constants to X86_CC_... constants */
3116 static const int
3117 cc_table [] = {
3118         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
3119         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
3120 };
3121
3122 static const int
3123 cc_signed_table [] = {
3124         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
3125         FALSE, FALSE, FALSE, FALSE
3126 };
3127
3128 /*#include "cprop.c"*/
3129
3130 static unsigned char*
3131 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
3132 {
3133         if (size == 8)
3134                 amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
3135         else
3136                 amd64_sse_cvttsd2si_reg_reg_size (code, dreg, sreg, 4);
3137
3138         if (size == 1)
3139                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
3140         else if (size == 2)
3141                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
3142         return code;
3143 }
3144
3145 static unsigned char*
3146 mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree)
3147 {
3148         int sreg = tree->sreg1;
3149         int need_touch = FALSE;
3150
3151 #if defined(TARGET_WIN32)
3152         need_touch = TRUE;
3153 #elif defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3154         if (!tree->flags & MONO_INST_INIT)
3155                 need_touch = TRUE;
3156 #endif
3157
3158         if (need_touch) {
3159                 guint8* br[5];
3160
3161                 /*
3162                  * Under Windows:
3163                  * If requested stack size is larger than one page,
3164                  * perform stack-touch operation
3165                  */
3166                 /*
3167                  * Generate stack probe code.
3168                  * Under Windows, it is necessary to allocate one page at a time,
3169                  * "touching" stack after each successful sub-allocation. This is
3170                  * because of the way stack growth is implemented - there is a
3171                  * guard page before the lowest stack page that is currently commited.
3172                  * Stack normally grows sequentially so OS traps access to the
3173                  * guard page and commits more pages when needed.
3174                  */
3175                 amd64_test_reg_imm (code, sreg, ~0xFFF);
3176                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3177
3178                 br[2] = code; /* loop */
3179                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3180                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3181                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
3182                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
3183                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
3184                 amd64_patch (br[3], br[2]);
3185                 amd64_test_reg_reg (code, sreg, sreg);
3186                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3187                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3188
3189                 br[1] = code; x86_jump8 (code, 0);
3190
3191                 amd64_patch (br[0], code);
3192                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3193                 amd64_patch (br[1], code);
3194                 amd64_patch (br[4], code);
3195         }
3196         else
3197                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
3198
3199         if (tree->flags & MONO_INST_INIT) {
3200                 int offset = 0;
3201                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
3202                         amd64_push_reg (code, AMD64_RAX);
3203                         offset += 8;
3204                 }
3205                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
3206                         amd64_push_reg (code, AMD64_RCX);
3207                         offset += 8;
3208                 }
3209                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
3210                         amd64_push_reg (code, AMD64_RDI);
3211                         offset += 8;
3212                 }
3213                 
3214                 amd64_shift_reg_imm (code, X86_SHR, sreg, 3);
3215                 if (sreg != AMD64_RCX)
3216                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
3217                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3218                                 
3219                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
3220                 if (cfg->param_area)
3221                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RDI, cfg->param_area);
3222                 amd64_cld (code);
3223                 amd64_prefix (code, X86_REP_PREFIX);
3224                 amd64_stosl (code);
3225                 
3226                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
3227                         amd64_pop_reg (code, AMD64_RDI);
3228                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
3229                         amd64_pop_reg (code, AMD64_RCX);
3230                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
3231                         amd64_pop_reg (code, AMD64_RAX);
3232         }
3233         return code;
3234 }
3235
3236 static guint8*
3237 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
3238 {
3239         CallInfo *cinfo;
3240         guint32 quad;
3241
3242         /* Move return value to the target register */
3243         /* FIXME: do this in the local reg allocator */
3244         switch (ins->opcode) {
3245         case OP_CALL:
3246         case OP_CALL_REG:
3247         case OP_CALL_MEMBASE:
3248         case OP_LCALL:
3249         case OP_LCALL_REG:
3250         case OP_LCALL_MEMBASE:
3251                 g_assert (ins->dreg == AMD64_RAX);
3252                 break;
3253         case OP_FCALL:
3254         case OP_FCALL_REG:
3255         case OP_FCALL_MEMBASE: {
3256                 MonoType *rtype = mini_get_underlying_type (((MonoCallInst*)ins)->signature->ret);
3257                 if (rtype->type == MONO_TYPE_R4) {
3258                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
3259                 }
3260                 else {
3261                         if (ins->dreg != AMD64_XMM0)
3262                                 amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
3263                 }
3264                 break;
3265         }
3266         case OP_RCALL:
3267         case OP_RCALL_REG:
3268         case OP_RCALL_MEMBASE:
3269                 if (ins->dreg != AMD64_XMM0)
3270                         amd64_sse_movss_reg_reg (code, ins->dreg, AMD64_XMM0);
3271                 break;
3272         case OP_VCALL:
3273         case OP_VCALL_REG:
3274         case OP_VCALL_MEMBASE:
3275         case OP_VCALL2:
3276         case OP_VCALL2_REG:
3277         case OP_VCALL2_MEMBASE:
3278                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature);
3279                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3280                         MonoInst *loc = (MonoInst *)cfg->arch.vret_addr_loc;
3281
3282                         /* Load the destination address */
3283                         g_assert (loc->opcode == OP_REGOFFSET);
3284                         amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, sizeof(gpointer));
3285
3286                         for (quad = 0; quad < 2; quad ++) {
3287                                 switch (cinfo->ret.pair_storage [quad]) {
3288                                 case ArgInIReg:
3289                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * sizeof(mgreg_t)), cinfo->ret.pair_regs [quad], sizeof(mgreg_t));
3290                                         break;
3291                                 case ArgInFloatSSEReg:
3292                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3293                                         break;
3294                                 case ArgInDoubleSSEReg:
3295                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3296                                         break;
3297                                 case ArgNone:
3298                                         break;
3299                                 default:
3300                                         NOT_IMPLEMENTED;
3301                                 }
3302                         }
3303                 }
3304                 break;
3305         }
3306
3307         return code;
3308 }
3309
3310 #endif /* DISABLE_JIT */
3311
3312 #ifdef __APPLE__
3313 static int tls_gs_offset;
3314 #endif
3315
3316 gboolean
3317 mono_amd64_have_tls_get (void)
3318 {
3319 #ifdef TARGET_MACH
3320         static gboolean have_tls_get = FALSE;
3321         static gboolean inited = FALSE;
3322
3323         if (inited)
3324                 return have_tls_get;
3325
3326 #if MONO_HAVE_FAST_TLS
3327         guint8 *ins = (guint8*)pthread_getspecific;
3328
3329         /*
3330          * We're looking for these two instructions:
3331          *
3332          * mov    %gs:[offset](,%rdi,8),%rax
3333          * retq
3334          */
3335         have_tls_get = ins [0] == 0x65 &&
3336                        ins [1] == 0x48 &&
3337                        ins [2] == 0x8b &&
3338                        ins [3] == 0x04 &&
3339                        ins [4] == 0xfd &&
3340                        ins [6] == 0x00 &&
3341                        ins [7] == 0x00 &&
3342                        ins [8] == 0x00 &&
3343                        ins [9] == 0xc3;
3344
3345         tls_gs_offset = ins[5];
3346
3347         /*
3348          * Apple now loads a different version of pthread_getspecific when launched from Xcode
3349          * For that version we're looking for these instructions:
3350          *
3351          * pushq  %rbp
3352          * movq   %rsp, %rbp
3353          * mov    %gs:[offset](,%rdi,8),%rax
3354          * popq   %rbp
3355          * retq
3356          */
3357         if (!have_tls_get) {
3358                 have_tls_get = ins [0] == 0x55 &&
3359                                ins [1] == 0x48 &&
3360                                ins [2] == 0x89 &&
3361                                ins [3] == 0xe5 &&
3362                                ins [4] == 0x65 &&
3363                                ins [5] == 0x48 &&
3364                                ins [6] == 0x8b &&
3365                                ins [7] == 0x04 &&
3366                                ins [8] == 0xfd &&
3367                                ins [10] == 0x00 &&
3368                                ins [11] == 0x00 &&
3369                                ins [12] == 0x00 &&
3370                                ins [13] == 0x5d &&
3371                                ins [14] == 0xc3;
3372
3373                 tls_gs_offset = ins[9];
3374         }
3375 #endif
3376
3377         inited = TRUE;
3378
3379         return have_tls_get;
3380 #elif defined(TARGET_ANDROID)
3381         return FALSE;
3382 #else
3383         return TRUE;
3384 #endif
3385 }
3386
3387 int
3388 mono_amd64_get_tls_gs_offset (void)
3389 {
3390 #ifdef TARGET_OSX
3391         return tls_gs_offset;
3392 #else
3393         g_assert_not_reached ();
3394         return -1;
3395 #endif
3396 }
3397
3398 /*
3399  * mono_amd64_emit_tls_get:
3400  * @code: buffer to store code to
3401  * @dreg: hard register where to place the result
3402  * @tls_offset: offset info
3403  *
3404  * mono_amd64_emit_tls_get emits in @code the native code that puts in
3405  * the dreg register the item in the thread local storage identified
3406  * by tls_offset.
3407  *
3408  * Returns: a pointer to the end of the stored code
3409  */
3410 guint8*
3411 mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
3412 {
3413 #ifdef TARGET_WIN32
3414         if (tls_offset < 64) {
3415                 x86_prefix (code, X86_GS_PREFIX);
3416                 amd64_mov_reg_mem (code, dreg, (tls_offset * 8) + 0x1480, 8);
3417         } else {
3418                 guint8 *buf [16];
3419
3420                 g_assert (tls_offset < 0x440);
3421                 /* Load TEB->TlsExpansionSlots */
3422                 x86_prefix (code, X86_GS_PREFIX);
3423                 amd64_mov_reg_mem (code, dreg, 0x1780, 8);
3424                 amd64_test_reg_reg (code, dreg, dreg);
3425                 buf [0] = code;
3426                 amd64_branch (code, X86_CC_EQ, code, TRUE);
3427                 amd64_mov_reg_membase (code, dreg, dreg, (tls_offset * 8) - 0x200, 8);
3428                 amd64_patch (buf [0], code);
3429         }
3430 #elif defined(__APPLE__)
3431         x86_prefix (code, X86_GS_PREFIX);
3432         amd64_mov_reg_mem (code, dreg, tls_gs_offset + (tls_offset * 8), 8);
3433 #else
3434         if (optimize_for_xen) {
3435                 x86_prefix (code, X86_FS_PREFIX);
3436                 amd64_mov_reg_mem (code, dreg, 0, 8);
3437                 amd64_mov_reg_membase (code, dreg, dreg, tls_offset, 8);
3438         } else {
3439                 x86_prefix (code, X86_FS_PREFIX);
3440                 amd64_mov_reg_mem (code, dreg, tls_offset, 8);
3441         }
3442 #endif
3443         return code;
3444 }
3445
3446 static guint8*
3447 emit_tls_get_reg (guint8* code, int dreg, int offset_reg)
3448 {
3449         /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
3450 #ifdef TARGET_OSX
3451         if (dreg != offset_reg)
3452                 amd64_mov_reg_reg (code, dreg, offset_reg, sizeof (mgreg_t));
3453         amd64_prefix (code, X86_GS_PREFIX);
3454         amd64_mov_reg_membase (code, dreg, dreg, 0, sizeof (mgreg_t));
3455 #elif defined(__linux__)
3456         int tmpreg = -1;
3457
3458         if (dreg == offset_reg) {
3459                 /* Use a temporary reg by saving it to the redzone */
3460                 tmpreg = dreg == AMD64_RAX ? AMD64_RCX : AMD64_RAX;
3461                 amd64_mov_membase_reg (code, AMD64_RSP, -8, tmpreg, 8);
3462                 amd64_mov_reg_reg (code, tmpreg, offset_reg, sizeof (gpointer));
3463                 offset_reg = tmpreg;
3464         }
3465         x86_prefix (code, X86_FS_PREFIX);
3466         amd64_mov_reg_mem (code, dreg, 0, 8);
3467         amd64_mov_reg_memindex (code, dreg, dreg, 0, offset_reg, 0, 8);
3468         if (tmpreg != -1)
3469                 amd64_mov_reg_membase (code, tmpreg, AMD64_RSP, -8, 8);
3470 #else
3471         g_assert_not_reached ();
3472 #endif
3473         return code;
3474 }
3475
3476 static guint8*
3477 amd64_emit_tls_set (guint8 *code, int sreg, int tls_offset)
3478 {
3479 #ifdef TARGET_WIN32
3480         g_assert_not_reached ();
3481 #elif defined(__APPLE__)
3482         x86_prefix (code, X86_GS_PREFIX);
3483         amd64_mov_mem_reg (code, tls_gs_offset + (tls_offset * 8), sreg, 8);
3484 #else
3485         g_assert (!optimize_for_xen);
3486         x86_prefix (code, X86_FS_PREFIX);
3487         amd64_mov_mem_reg (code, tls_offset, sreg, 8);
3488 #endif
3489         return code;
3490 }
3491
3492 static guint8*
3493 amd64_emit_tls_set_reg (guint8 *code, int sreg, int offset_reg)
3494 {
3495         /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
3496 #ifdef TARGET_WIN32
3497         g_assert_not_reached ();
3498 #elif defined(__APPLE__)
3499         x86_prefix (code, X86_GS_PREFIX);
3500         amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
3501 #else
3502         x86_prefix (code, X86_FS_PREFIX);
3503         amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
3504 #endif
3505         return code;
3506 }
3507  
3508  /*
3509  * mono_arch_translate_tls_offset:
3510  *
3511  *   Translate the TLS offset OFFSET computed by MONO_THREAD_VAR_OFFSET () into a format usable by OP_TLS_GET_REG/OP_TLS_SET_REG.
3512  */
3513 int
3514 mono_arch_translate_tls_offset (int offset)
3515 {
3516 #ifdef __APPLE__
3517         return tls_gs_offset + (offset * 8);
3518 #else
3519         return offset;
3520 #endif
3521 }
3522
3523 /*
3524  * emit_setup_lmf:
3525  *
3526  *   Emit code to initialize an LMF structure at LMF_OFFSET.
3527  */
3528 static guint8*
3529 emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset, int cfa_offset)
3530 {
3531         /* 
3532          * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
3533          */
3534         /* 
3535          * sp is saved right before calls but we need to save it here too so
3536          * async stack walks would work.
3537          */
3538         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
3539         /* Save rbp */
3540         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), AMD64_RBP, 8);
3541         if (cfg->arch.omit_fp && cfa_offset != -1)
3542                 mono_emit_unwind_op_offset (cfg, code, AMD64_RBP, - (cfa_offset - (lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp))));
3543
3544         /* These can't contain refs */
3545         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, previous_lmf), SLOT_NOREF);
3546         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rip), SLOT_NOREF);
3547         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), SLOT_NOREF);
3548         /* These are handled automatically by the stack marking code */
3549         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), SLOT_NOREF);
3550
3551         return code;
3552 }
3553
3554 /* benchmark and set based on cpu */
3555 #define LOOP_ALIGNMENT 8
3556 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
3557
3558 #ifndef DISABLE_JIT
3559 void
3560 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3561 {
3562         MonoInst *ins;
3563         MonoCallInst *call;
3564         guint offset;
3565         guint8 *code = cfg->native_code + cfg->code_len;
3566         int max_len;
3567
3568         /* Fix max_offset estimate for each successor bb */
3569         if (cfg->opt & MONO_OPT_BRANCH) {
3570                 int current_offset = cfg->code_len;
3571                 MonoBasicBlock *current_bb;
3572                 for (current_bb = bb; current_bb != NULL; current_bb = current_bb->next_bb) {
3573                         current_bb->max_offset = current_offset;
3574                         current_offset += current_bb->max_length;
3575                 }
3576         }
3577
3578         if (cfg->opt & MONO_OPT_LOOP) {
3579                 int pad, align = LOOP_ALIGNMENT;
3580                 /* set alignment depending on cpu */
3581                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
3582                         pad = align - pad;
3583                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
3584                         amd64_padding (code, pad);
3585                         cfg->code_len += pad;
3586                         bb->native_offset = cfg->code_len;
3587                 }
3588         }
3589
3590         if (cfg->verbose_level > 2)
3591                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
3592
3593         if ((cfg->prof_options & MONO_PROFILE_COVERAGE) && cfg->coverage_info) {
3594                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
3595                 g_assert (!cfg->compile_aot);
3596
3597                 cov->data [bb->dfn].cil_code = bb->cil_code;
3598                 amd64_mov_reg_imm (code, AMD64_R11, (guint64)&cov->data [bb->dfn].count);
3599                 /* this is not thread save, but good enough */
3600                 amd64_inc_membase (code, AMD64_R11, 0);
3601         }
3602
3603         offset = code - cfg->native_code;
3604
3605         mono_debug_open_block (cfg, bb, offset);
3606
3607     if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
3608                 x86_breakpoint (code);
3609
3610         MONO_BB_FOR_EACH_INS (bb, ins) {
3611                 offset = code - cfg->native_code;
3612
3613                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3614
3615 #define EXTRA_CODE_SPACE (16)
3616
3617                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) {
3618                         cfg->code_size *= 2;
3619                         cfg->native_code = (unsigned char *)mono_realloc_native_code(cfg);
3620                         code = cfg->native_code + offset;
3621                         cfg->stat_code_reallocs++;
3622                 }
3623
3624                 if (cfg->debug_info)
3625                         mono_debug_record_line_number (cfg, ins, offset);
3626
3627                 switch (ins->opcode) {
3628                 case OP_BIGMUL:
3629                         amd64_mul_reg (code, ins->sreg2, TRUE);
3630                         break;
3631                 case OP_BIGMUL_UN:
3632                         amd64_mul_reg (code, ins->sreg2, FALSE);
3633                         break;
3634                 case OP_X86_SETEQ_MEMBASE:
3635                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
3636                         break;
3637                 case OP_STOREI1_MEMBASE_IMM:
3638                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
3639                         break;
3640                 case OP_STOREI2_MEMBASE_IMM:
3641                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
3642                         break;
3643                 case OP_STOREI4_MEMBASE_IMM:
3644                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
3645                         break;
3646                 case OP_STOREI1_MEMBASE_REG:
3647                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
3648                         break;
3649                 case OP_STOREI2_MEMBASE_REG:
3650                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
3651                         break;
3652                 /* In AMD64 NaCl, pointers are 4 bytes, */
3653                 /*  so STORE_* != STOREI8_*. Likewise below. */
3654                 case OP_STORE_MEMBASE_REG:
3655                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, sizeof(gpointer));
3656                         break;
3657                 case OP_STOREI8_MEMBASE_REG:
3658                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
3659                         break;
3660                 case OP_STOREI4_MEMBASE_REG:
3661                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
3662                         break;
3663                 case OP_STORE_MEMBASE_IMM:
3664                         /* In NaCl, this could be a PCONST type, which could */
3665                         /* mean a pointer type was copied directly into the  */
3666                         /* lower 32-bits of inst_imm, so for InvalidPtr==-1  */
3667                         /* the value would be 0x00000000FFFFFFFF which is    */
3668                         /* not proper for an imm32 unless you cast it.       */
3669                         g_assert (amd64_is_imm32 (ins->inst_imm));
3670                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, (gint32)ins->inst_imm, sizeof(gpointer));
3671                         break;
3672                 case OP_STOREI8_MEMBASE_IMM:
3673                         g_assert (amd64_is_imm32 (ins->inst_imm));
3674                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
3675                         break;
3676                 case OP_LOAD_MEM:
3677 #ifdef __mono_ilp32__
3678                         /* In ILP32, pointers are 4 bytes, so separate these */
3679                         /* cases, use literal 8 below where we really want 8 */
3680                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3681                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, sizeof(gpointer));
3682                         break;
3683 #endif
3684                 case OP_LOADI8_MEM:
3685                         // FIXME: Decompose this earlier
3686                         if (amd64_use_imm32 (ins->inst_imm))
3687                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 8);
3688                         else {
3689                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_imm, sizeof(gpointer));
3690                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 8);
3691                         }
3692                         break;
3693                 case OP_LOADI4_MEM:
3694                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3695                         amd64_movsxd_reg_membase (code, ins->dreg, ins->dreg, 0);
3696                         break;
3697                 case OP_LOADU4_MEM:
3698                         // FIXME: Decompose this earlier
3699                         if (amd64_use_imm32 (ins->inst_imm))
3700                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
3701                         else {
3702                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_imm, sizeof(gpointer));
3703                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
3704                         }
3705                         break;
3706                 case OP_LOADU1_MEM:
3707                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3708                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, FALSE);
3709                         break;
3710                 case OP_LOADU2_MEM:
3711                         /* For NaCl, pointers are 4 bytes, so separate these */
3712                         /* cases, use literal 8 below where we really want 8 */
3713                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3714                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, TRUE);
3715                         break;
3716                 case OP_LOAD_MEMBASE:
3717                         g_assert (amd64_is_imm32 (ins->inst_offset));
3718                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof(gpointer));
3719                         break;
3720                 case OP_LOADI8_MEMBASE:
3721                         /* Use literal 8 instead of sizeof pointer or */
3722                         /* register, we really want 8 for this opcode */
3723                         g_assert (amd64_is_imm32 (ins->inst_offset));
3724                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 8);
3725                         break;
3726                 case OP_LOADI4_MEMBASE:
3727                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
3728                         break;
3729                 case OP_LOADU4_MEMBASE:
3730                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
3731                         break;
3732                 case OP_LOADU1_MEMBASE:
3733                         /* The cpu zero extends the result into 64 bits */
3734                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE, 4);
3735                         break;
3736                 case OP_LOADI1_MEMBASE:
3737                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
3738                         break;
3739                 case OP_LOADU2_MEMBASE:
3740                         /* The cpu zero extends the result into 64 bits */
3741                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE, 4);
3742                         break;
3743                 case OP_LOADI2_MEMBASE:
3744                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
3745                         break;
3746                 case OP_AMD64_LOADI8_MEMINDEX:
3747                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
3748                         break;
3749                 case OP_LCONV_TO_I1:
3750                 case OP_ICONV_TO_I1:
3751                 case OP_SEXT_I1:
3752                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3753                         break;
3754                 case OP_LCONV_TO_I2:
3755                 case OP_ICONV_TO_I2:
3756                 case OP_SEXT_I2:
3757                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3758                         break;
3759                 case OP_LCONV_TO_U1:
3760                 case OP_ICONV_TO_U1:
3761                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
3762                         break;
3763                 case OP_LCONV_TO_U2:
3764                 case OP_ICONV_TO_U2:
3765                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
3766                         break;
3767                 case OP_ZEXT_I4:
3768                         /* Clean out the upper word */
3769                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
3770                         break;
3771                 case OP_SEXT_I4:
3772                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
3773                         break;
3774                 case OP_COMPARE:
3775                 case OP_LCOMPARE:
3776                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3777                         break;
3778                 case OP_COMPARE_IMM:
3779 #if defined(__mono_ilp32__)
3780                         /* Comparison of pointer immediates should be 4 bytes to avoid sign-extend problems */
3781                         g_assert (amd64_is_imm32 (ins->inst_imm));
3782                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
3783                         break;
3784 #endif
3785                 case OP_LCOMPARE_IMM:
3786                         g_assert (amd64_is_imm32 (ins->inst_imm));
3787                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
3788                         break;
3789                 case OP_X86_COMPARE_REG_MEMBASE:
3790                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
3791                         break;
3792                 case OP_X86_TEST_NULL:
3793                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
3794                         break;
3795                 case OP_AMD64_TEST_NULL:
3796                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
3797                         break;
3798
3799                 case OP_X86_ADD_REG_MEMBASE:
3800                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3801                         break;
3802                 case OP_X86_SUB_REG_MEMBASE:
3803                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3804                         break;
3805                 case OP_X86_AND_REG_MEMBASE:
3806                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3807                         break;
3808                 case OP_X86_OR_REG_MEMBASE:
3809                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3810                         break;
3811                 case OP_X86_XOR_REG_MEMBASE:
3812                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3813                         break;
3814
3815                 case OP_X86_ADD_MEMBASE_IMM:
3816                         /* FIXME: Make a 64 version too */
3817                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3818                         break;
3819                 case OP_X86_SUB_MEMBASE_IMM:
3820                         g_assert (amd64_is_imm32 (ins->inst_imm));
3821                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3822                         break;
3823                 case OP_X86_AND_MEMBASE_IMM:
3824                         g_assert (amd64_is_imm32 (ins->inst_imm));
3825                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3826                         break;
3827                 case OP_X86_OR_MEMBASE_IMM:
3828                         g_assert (amd64_is_imm32 (ins->inst_imm));
3829                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3830                         break;
3831                 case OP_X86_XOR_MEMBASE_IMM:
3832                         g_assert (amd64_is_imm32 (ins->inst_imm));
3833                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3834                         break;
3835                 case OP_X86_ADD_MEMBASE_REG:
3836                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3837                         break;
3838                 case OP_X86_SUB_MEMBASE_REG:
3839                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3840                         break;
3841                 case OP_X86_AND_MEMBASE_REG:
3842                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3843                         break;
3844                 case OP_X86_OR_MEMBASE_REG:
3845                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3846                         break;
3847                 case OP_X86_XOR_MEMBASE_REG:
3848                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3849                         break;
3850                 case OP_X86_INC_MEMBASE:
3851                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3852                         break;
3853                 case OP_X86_INC_REG:
3854                         amd64_inc_reg_size (code, ins->dreg, 4);
3855                         break;
3856                 case OP_X86_DEC_MEMBASE:
3857                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
3858                         break;
3859                 case OP_X86_DEC_REG:
3860                         amd64_dec_reg_size (code, ins->dreg, 4);
3861                         break;
3862                 case OP_X86_MUL_REG_MEMBASE:
3863                 case OP_X86_MUL_MEMBASE_REG:
3864                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3865                         break;
3866                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
3867                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
3868                         break;
3869                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3870                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3871                         break;
3872                 case OP_AMD64_COMPARE_MEMBASE_REG:
3873                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3874                         break;
3875                 case OP_AMD64_COMPARE_MEMBASE_IMM:
3876                         g_assert (amd64_is_imm32 (ins->inst_imm));
3877                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3878                         break;
3879                 case OP_X86_COMPARE_MEMBASE8_IMM:
3880                         amd64_alu_membase8_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
3881                         break;
3882                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
3883                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
3884                         break;
3885                 case OP_AMD64_COMPARE_REG_MEMBASE:
3886                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3887                         break;
3888
3889                 case OP_AMD64_ADD_REG_MEMBASE:
3890                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3891                         break;
3892                 case OP_AMD64_SUB_REG_MEMBASE:
3893                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3894                         break;
3895                 case OP_AMD64_AND_REG_MEMBASE:
3896                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3897                         break;
3898                 case OP_AMD64_OR_REG_MEMBASE:
3899                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3900                         break;
3901                 case OP_AMD64_XOR_REG_MEMBASE:
3902                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
3903                         break;
3904
3905                 case OP_AMD64_ADD_MEMBASE_REG:
3906                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3907                         break;
3908                 case OP_AMD64_SUB_MEMBASE_REG:
3909                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3910                         break;
3911                 case OP_AMD64_AND_MEMBASE_REG:
3912                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3913                         break;
3914                 case OP_AMD64_OR_MEMBASE_REG:
3915                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3916                         break;
3917                 case OP_AMD64_XOR_MEMBASE_REG:
3918                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
3919                         break;
3920
3921                 case OP_AMD64_ADD_MEMBASE_IMM:
3922                         g_assert (amd64_is_imm32 (ins->inst_imm));
3923                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3924                         break;
3925                 case OP_AMD64_SUB_MEMBASE_IMM:
3926                         g_assert (amd64_is_imm32 (ins->inst_imm));
3927                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3928                         break;
3929                 case OP_AMD64_AND_MEMBASE_IMM:
3930                         g_assert (amd64_is_imm32 (ins->inst_imm));
3931                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3932                         break;
3933                 case OP_AMD64_OR_MEMBASE_IMM:
3934                         g_assert (amd64_is_imm32 (ins->inst_imm));
3935                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3936                         break;
3937                 case OP_AMD64_XOR_MEMBASE_IMM:
3938                         g_assert (amd64_is_imm32 (ins->inst_imm));
3939                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
3940                         break;
3941
3942                 case OP_BREAK:
3943                         amd64_breakpoint (code);
3944                         break;
3945                 case OP_RELAXED_NOP:
3946                         x86_prefix (code, X86_REP_PREFIX);
3947                         x86_nop (code);
3948                         break;
3949                 case OP_HARD_NOP:
3950                         x86_nop (code);
3951                         break;
3952                 case OP_NOP:
3953                 case OP_DUMMY_USE:
3954                 case OP_DUMMY_STORE:
3955                 case OP_DUMMY_ICONST:
3956                 case OP_DUMMY_R8CONST:
3957                 case OP_NOT_REACHED:
3958                 case OP_NOT_NULL:
3959                         break;
3960                 case OP_IL_SEQ_POINT:
3961                         mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
3962                         break;
3963                 case OP_SEQ_POINT: {
3964                         if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
3965                                 MonoInst *var = (MonoInst *)cfg->arch.ss_tramp_var;
3966                                 guint8 *label;
3967
3968                                 /* Load ss_tramp_var */
3969                                 /* This is equal to &ss_trampoline */
3970                                 amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
3971                                 /* Load the trampoline address */
3972                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8);
3973                                 /* Call it if it is non-null */
3974                                 amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
3975                                 label = code;
3976                                 amd64_branch8 (code, X86_CC_Z, 0, FALSE);
3977                                 amd64_call_reg (code, AMD64_R11);
3978                                 amd64_patch (label, code);
3979                         }
3980
3981                         /* 
3982                          * This is the address which is saved in seq points, 
3983                          */
3984                         mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
3985
3986                         if (cfg->compile_aot) {
3987                                 guint32 offset = code - cfg->native_code;
3988                                 guint32 val;
3989                                 MonoInst *info_var = (MonoInst *)cfg->arch.seq_point_info_var;
3990                                 guint8 *label;
3991
3992                                 /* Load info var */
3993                                 amd64_mov_reg_membase (code, AMD64_R11, info_var->inst_basereg, info_var->inst_offset, 8);
3994                                 val = ((offset) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
3995                                 /* Load the info->bp_addrs [offset], which is either NULL or the address of the breakpoint trampoline */
3996                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, val, 8);
3997                                 amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
3998                                 label = code;
3999                                 amd64_branch8 (code, X86_CC_Z, 0, FALSE);
4000                                 /* Call the trampoline */
4001                                 amd64_call_reg (code, AMD64_R11);
4002                                 amd64_patch (label, code);
4003                         } else {
4004                                 MonoInst *var = (MonoInst *)cfg->arch.bp_tramp_var;
4005                                 guint8 *label;
4006
4007                                 /*
4008                                  * Emit a test+branch against a constant, the constant will be overwritten
4009                                  * by mono_arch_set_breakpoint () to cause the test to fail.
4010                                  */
4011                                 amd64_mov_reg_imm (code, AMD64_R11, 0);
4012                                 amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
4013                                 label = code;
4014                                 amd64_branch8 (code, X86_CC_Z, 0, FALSE);
4015
4016                                 g_assert (var);
4017                                 g_assert (var->opcode == OP_REGOFFSET);
4018                                 /* Load bp_tramp_var */
4019                                 /* This is equal to &bp_trampoline */
4020                                 amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4021                                 /* Call the trampoline */
4022                                 amd64_call_membase (code, AMD64_R11, 0);
4023                                 amd64_patch (label, code);
4024                         }
4025                         /*
4026                          * Add an additional nop so skipping the bp doesn't cause the ip to point
4027                          * to another IL offset.
4028                          */
4029                         x86_nop (code);
4030                         break;
4031                 }
4032                 case OP_ADDCC:
4033                 case OP_LADDCC:
4034                 case OP_LADD:
4035                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
4036                         break;
4037                 case OP_ADC:
4038                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
4039                         break;
4040                 case OP_ADD_IMM:
4041                 case OP_LADD_IMM:
4042                         g_assert (amd64_is_imm32 (ins->inst_imm));
4043                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
4044                         break;
4045                 case OP_ADC_IMM:
4046                         g_assert (amd64_is_imm32 (ins->inst_imm));
4047                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
4048                         break;
4049                 case OP_SUBCC:
4050                 case OP_LSUBCC:
4051                 case OP_LSUB:
4052                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
4053                         break;
4054                 case OP_SBB:
4055                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
4056                         break;
4057                 case OP_SUB_IMM:
4058                 case OP_LSUB_IMM:
4059                         g_assert (amd64_is_imm32 (ins->inst_imm));
4060                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
4061                         break;
4062                 case OP_SBB_IMM:
4063                         g_assert (amd64_is_imm32 (ins->inst_imm));
4064                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
4065                         break;
4066                 case OP_LAND:
4067                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
4068                         break;
4069                 case OP_AND_IMM:
4070                 case OP_LAND_IMM:
4071                         g_assert (amd64_is_imm32 (ins->inst_imm));
4072                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
4073                         break;
4074                 case OP_LMUL:
4075                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
4076                         break;
4077                 case OP_MUL_IMM:
4078                 case OP_LMUL_IMM:
4079                 case OP_IMUL_IMM: {
4080                         guint32 size = (ins->opcode == OP_IMUL_IMM) ? 4 : 8;
4081                         
4082                         switch (ins->inst_imm) {
4083                         case 2:
4084                                 /* MOV r1, r2 */
4085                                 /* ADD r1, r1 */
4086                                 if (ins->dreg != ins->sreg1)
4087                                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, size);
4088                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4089                                 break;
4090                         case 3:
4091                                 /* LEA r1, [r2 + r2*2] */
4092                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4093                                 break;
4094                         case 5:
4095                                 /* LEA r1, [r2 + r2*4] */
4096                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4097                                 break;
4098                         case 6:
4099                                 /* LEA r1, [r2 + r2*2] */
4100                                 /* ADD r1, r1          */
4101                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4102                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4103                                 break;
4104                         case 9:
4105                                 /* LEA r1, [r2 + r2*8] */
4106                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
4107                                 break;
4108                         case 10:
4109                                 /* LEA r1, [r2 + r2*4] */
4110                                 /* ADD r1, r1          */
4111                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4112                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4113                                 break;
4114                         case 12:
4115                                 /* LEA r1, [r2 + r2*2] */
4116                                 /* SHL r1, 2           */
4117                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4118                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
4119                                 break;
4120                         case 25:
4121                                 /* LEA r1, [r2 + r2*4] */
4122                                 /* LEA r1, [r1 + r1*4] */
4123                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4124                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
4125                                 break;
4126                         case 100:
4127                                 /* LEA r1, [r2 + r2*4] */
4128                                 /* SHL r1, 2           */
4129                                 /* LEA r1, [r1 + r1*4] */
4130                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4131                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
4132                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
4133                                 break;
4134                         default:
4135                                 amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, size);
4136                                 break;
4137                         }
4138                         break;
4139                 }
4140                 case OP_LDIV:
4141                 case OP_LREM:
4142                         /* Regalloc magic makes the div/rem cases the same */
4143                         if (ins->sreg2 == AMD64_RDX) {
4144                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4145                                 amd64_cdq (code);
4146                                 amd64_div_membase (code, AMD64_RSP, -8, TRUE);
4147                         } else {
4148                                 amd64_cdq (code);
4149                                 amd64_div_reg (code, ins->sreg2, TRUE);
4150                         }
4151                         break;
4152                 case OP_LDIV_UN:
4153                 case OP_LREM_UN:
4154                         if (ins->sreg2 == AMD64_RDX) {
4155                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4156                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4157                                 amd64_div_membase (code, AMD64_RSP, -8, FALSE);
4158                         } else {
4159                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4160                                 amd64_div_reg (code, ins->sreg2, FALSE);
4161                         }
4162                         break;
4163                 case OP_IDIV:
4164                 case OP_IREM:
4165                         if (ins->sreg2 == AMD64_RDX) {
4166                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4167                                 amd64_cdq_size (code, 4);
4168                                 amd64_div_membase_size (code, AMD64_RSP, -8, TRUE, 4);
4169                         } else {
4170                                 amd64_cdq_size (code, 4);
4171                                 amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
4172                         }
4173                         break;
4174                 case OP_IDIV_UN:
4175                 case OP_IREM_UN:
4176                         if (ins->sreg2 == AMD64_RDX) {
4177                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4178                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4179                                 amd64_div_membase_size (code, AMD64_RSP, -8, FALSE, 4);
4180                         } else {
4181                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4182                                 amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
4183                         }
4184                         break;
4185                 case OP_LMUL_OVF:
4186                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
4187                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4188                         break;
4189                 case OP_LOR:
4190                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4191                         break;
4192                 case OP_OR_IMM:
4193                 case OP_LOR_IMM:
4194                         g_assert (amd64_is_imm32 (ins->inst_imm));
4195                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
4196                         break;
4197                 case OP_LXOR:
4198                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
4199                         break;
4200                 case OP_XOR_IMM:
4201                 case OP_LXOR_IMM:
4202                         g_assert (amd64_is_imm32 (ins->inst_imm));
4203                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
4204                         break;
4205                 case OP_LSHL:
4206                         g_assert (ins->sreg2 == AMD64_RCX);
4207                         amd64_shift_reg (code, X86_SHL, ins->dreg);
4208                         break;
4209                 case OP_LSHR:
4210                         g_assert (ins->sreg2 == AMD64_RCX);
4211                         amd64_shift_reg (code, X86_SAR, ins->dreg);
4212                         break;
4213                 case OP_SHR_IMM:
4214                 case OP_LSHR_IMM:
4215                         g_assert (amd64_is_imm32 (ins->inst_imm));
4216                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
4217                         break;
4218                 case OP_SHR_UN_IMM:
4219                         g_assert (amd64_is_imm32 (ins->inst_imm));
4220                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
4221                         break;
4222                 case OP_LSHR_UN_IMM:
4223                         g_assert (amd64_is_imm32 (ins->inst_imm));
4224                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
4225                         break;
4226                 case OP_LSHR_UN:
4227                         g_assert (ins->sreg2 == AMD64_RCX);
4228                         amd64_shift_reg (code, X86_SHR, ins->dreg);
4229                         break;
4230                 case OP_SHL_IMM:
4231                 case OP_LSHL_IMM:
4232                         g_assert (amd64_is_imm32 (ins->inst_imm));
4233                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
4234                         break;
4235
4236                 case OP_IADDCC:
4237                 case OP_IADD:
4238                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
4239                         break;
4240                 case OP_IADC:
4241                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
4242                         break;
4243                 case OP_IADD_IMM:
4244                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
4245                         break;
4246                 case OP_IADC_IMM:
4247                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
4248                         break;
4249                 case OP_ISUBCC:
4250                 case OP_ISUB:
4251                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
4252                         break;
4253                 case OP_ISBB:
4254                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
4255                         break;
4256                 case OP_ISUB_IMM:
4257                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
4258                         break;
4259                 case OP_ISBB_IMM:
4260                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
4261                         break;
4262                 case OP_IAND:
4263                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
4264                         break;
4265                 case OP_IAND_IMM:
4266                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
4267                         break;
4268                 case OP_IOR:
4269                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
4270                         break;
4271                 case OP_IOR_IMM:
4272                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
4273                         break;
4274                 case OP_IXOR:
4275                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
4276                         break;
4277                 case OP_IXOR_IMM:
4278                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
4279                         break;
4280                 case OP_INEG:
4281                         amd64_neg_reg_size (code, ins->sreg1, 4);
4282                         break;
4283                 case OP_INOT:
4284                         amd64_not_reg_size (code, ins->sreg1, 4);
4285                         break;
4286                 case OP_ISHL:
4287                         g_assert (ins->sreg2 == AMD64_RCX);
4288                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
4289                         break;
4290                 case OP_ISHR:
4291                         g_assert (ins->sreg2 == AMD64_RCX);
4292                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
4293                         break;
4294                 case OP_ISHR_IMM:
4295                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
4296                         break;
4297                 case OP_ISHR_UN_IMM:
4298                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
4299                         break;
4300                 case OP_ISHR_UN:
4301                         g_assert (ins->sreg2 == AMD64_RCX);
4302                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
4303                         break;
4304                 case OP_ISHL_IMM:
4305                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
4306                         break;
4307                 case OP_IMUL:
4308                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
4309                         break;
4310                 case OP_IMUL_OVF:
4311                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
4312                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4313                         break;
4314                 case OP_IMUL_OVF_UN:
4315                 case OP_LMUL_OVF_UN: {
4316                         /* the mul operation and the exception check should most likely be split */
4317                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
4318                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
4319                         /*g_assert (ins->sreg2 == X86_EAX);
4320                         g_assert (ins->dreg == X86_EAX);*/
4321                         if (ins->sreg2 == X86_EAX) {
4322                                 non_eax_reg = ins->sreg1;
4323                         } else if (ins->sreg1 == X86_EAX) {
4324                                 non_eax_reg = ins->sreg2;
4325                         } else {
4326                                 /* no need to save since we're going to store to it anyway */
4327                                 if (ins->dreg != X86_EAX) {
4328                                         saved_eax = TRUE;
4329                                         amd64_push_reg (code, X86_EAX);
4330                                 }
4331                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
4332                                 non_eax_reg = ins->sreg2;
4333                         }
4334                         if (ins->dreg == X86_EDX) {
4335                                 if (!saved_eax) {
4336                                         saved_eax = TRUE;
4337                                         amd64_push_reg (code, X86_EAX);
4338                                 }
4339                         } else {
4340                                 saved_edx = TRUE;
4341                                 amd64_push_reg (code, X86_EDX);
4342                         }
4343                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
4344                         /* save before the check since pop and mov don't change the flags */
4345                         if (ins->dreg != X86_EAX)
4346                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
4347                         if (saved_edx)
4348                                 amd64_pop_reg (code, X86_EDX);
4349                         if (saved_eax)
4350                                 amd64_pop_reg (code, X86_EAX);
4351                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4352                         break;
4353                 }
4354                 case OP_ICOMPARE:
4355                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4356                         break;
4357                 case OP_ICOMPARE_IMM:
4358                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
4359                         break;
4360                 case OP_IBEQ:
4361                 case OP_IBLT:
4362                 case OP_IBGT:
4363                 case OP_IBGE:
4364                 case OP_IBLE:
4365                 case OP_LBEQ:
4366                 case OP_LBLT:
4367                 case OP_LBGT:
4368                 case OP_LBGE:
4369                 case OP_LBLE:
4370                 case OP_IBNE_UN:
4371                 case OP_IBLT_UN:
4372                 case OP_IBGT_UN:
4373                 case OP_IBGE_UN:
4374                 case OP_IBLE_UN:
4375                 case OP_LBNE_UN:
4376                 case OP_LBLT_UN:
4377                 case OP_LBGT_UN:
4378                 case OP_LBGE_UN:
4379                 case OP_LBLE_UN:
4380                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
4381                         break;
4382
4383                 case OP_CMOV_IEQ:
4384                 case OP_CMOV_IGE:
4385                 case OP_CMOV_IGT:
4386                 case OP_CMOV_ILE:
4387                 case OP_CMOV_ILT:
4388                 case OP_CMOV_INE_UN:
4389                 case OP_CMOV_IGE_UN:
4390                 case OP_CMOV_IGT_UN:
4391                 case OP_CMOV_ILE_UN:
4392                 case OP_CMOV_ILT_UN:
4393                 case OP_CMOV_LEQ:
4394                 case OP_CMOV_LGE:
4395                 case OP_CMOV_LGT:
4396                 case OP_CMOV_LLE:
4397                 case OP_CMOV_LLT:
4398                 case OP_CMOV_LNE_UN:
4399                 case OP_CMOV_LGE_UN:
4400                 case OP_CMOV_LGT_UN:
4401                 case OP_CMOV_LLE_UN:
4402                 case OP_CMOV_LLT_UN:
4403                         g_assert (ins->dreg == ins->sreg1);
4404                         /* This needs to operate on 64 bit values */
4405                         amd64_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
4406                         break;
4407
4408                 case OP_LNOT:
4409                         amd64_not_reg (code, ins->sreg1);
4410                         break;
4411                 case OP_LNEG:
4412                         amd64_neg_reg (code, ins->sreg1);
4413                         break;
4414
4415                 case OP_ICONST:
4416                 case OP_I8CONST:
4417                         if ((((guint64)ins->inst_c0) >> 32) == 0 && !mini_get_debug_options()->single_imm_size)
4418                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
4419                         else
4420                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
4421                         break;
4422                 case OP_AOTCONST:
4423                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4424                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, sizeof(gpointer));
4425                         break;
4426                 case OP_JUMP_TABLE:
4427                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4428                         amd64_mov_reg_imm_size (code, ins->dreg, 0, 8);
4429                         break;
4430                 case OP_MOVE:
4431                         if (ins->dreg != ins->sreg1)
4432                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof(mgreg_t));
4433                         break;
4434                 case OP_AMD64_SET_XMMREG_R4: {
4435                         if (cfg->r4fp) {
4436                                 if (ins->dreg != ins->sreg1)
4437                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
4438                         } else {
4439                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
4440                         }
4441                         break;
4442                 }
4443                 case OP_AMD64_SET_XMMREG_R8: {
4444                         if (ins->dreg != ins->sreg1)
4445                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
4446                         break;
4447                 }
4448                 case OP_TAILCALL: {
4449                         MonoCallInst *call = (MonoCallInst*)ins;
4450                         int i, save_area_offset;
4451
4452                         g_assert (!cfg->method->save_lmf);
4453
4454                         /* Restore callee saved registers */
4455                         save_area_offset = cfg->arch.reg_save_area_offset;
4456                         for (i = 0; i < AMD64_NREG; ++i)
4457                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4458                                         amd64_mov_reg_membase (code, i, cfg->frame_reg, save_area_offset, 8);
4459                                         save_area_offset += 8;
4460                                 }
4461
4462                         if (cfg->arch.omit_fp) {
4463                                 if (cfg->arch.stack_alloc_size)
4464                                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
4465                                 // FIXME:
4466                                 if (call->stack_usage)
4467                                         NOT_IMPLEMENTED;
4468                         } else {
4469                                 /* Copy arguments on the stack to our argument area */
4470                                 for (i = 0; i < call->stack_usage; i += sizeof(mgreg_t)) {
4471                                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, i, sizeof(mgreg_t));
4472                                         amd64_mov_membase_reg (code, AMD64_RBP, 16 + i, AMD64_RAX, sizeof(mgreg_t));
4473                                 }
4474
4475                                 amd64_leave (code);
4476                         }
4477
4478                         offset = code - cfg->native_code;
4479                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, call->method);
4480                         if (cfg->compile_aot)
4481                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
4482                         else
4483                                 amd64_set_reg_template (code, AMD64_R11);
4484                         amd64_jump_reg (code, AMD64_R11);
4485                         ins->flags |= MONO_INST_GC_CALLSITE;
4486                         ins->backend.pc_offset = code - cfg->native_code;
4487                         break;
4488                 }
4489                 case OP_CHECK_THIS:
4490                         /* ensure ins->sreg1 is not NULL */
4491                         amd64_alu_membase_imm_size (code, X86_CMP, ins->sreg1, 0, 0, 4);
4492                         break;
4493                 case OP_ARGLIST: {
4494                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, cfg->sig_cookie);
4495                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, sizeof(gpointer));
4496                         break;
4497                 }
4498                 case OP_CALL:
4499                 case OP_FCALL:
4500                 case OP_RCALL:
4501                 case OP_LCALL:
4502                 case OP_VCALL:
4503                 case OP_VCALL2:
4504                 case OP_VOIDCALL:
4505                         call = (MonoCallInst*)ins;
4506                         /*
4507                          * The AMD64 ABI forces callers to know about varargs.
4508                          */
4509                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
4510                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4511                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4512                                 /* 
4513                                  * Since the unmanaged calling convention doesn't contain a 
4514                                  * 'vararg' entry, we have to treat every pinvoke call as a
4515                                  * potential vararg call.
4516                                  */
4517                                 guint32 nregs, i;
4518                                 nregs = 0;
4519                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4520                                         if (call->used_fregs & (1 << i))
4521                                                 nregs ++;
4522                                 if (!nregs)
4523                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4524                                 else
4525                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4526                         }
4527
4528                         if (ins->flags & MONO_INST_HAS_METHOD)
4529                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE);
4530                         else
4531                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE);
4532                         ins->flags |= MONO_INST_GC_CALLSITE;
4533                         ins->backend.pc_offset = code - cfg->native_code;
4534                         code = emit_move_return_value (cfg, ins, code);
4535                         break;
4536                 case OP_FCALL_REG:
4537                 case OP_RCALL_REG:
4538                 case OP_LCALL_REG:
4539                 case OP_VCALL_REG:
4540                 case OP_VCALL2_REG:
4541                 case OP_VOIDCALL_REG:
4542                 case OP_CALL_REG:
4543                         call = (MonoCallInst*)ins;
4544
4545                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
4546                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4547                                 ins->sreg1 = AMD64_R11;
4548                         }
4549
4550                         /*
4551                          * The AMD64 ABI forces callers to know about varargs.
4552                          */
4553                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
4554                                 if (ins->sreg1 == AMD64_RAX) {
4555                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4556                                         ins->sreg1 = AMD64_R11;
4557                                 }
4558                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4559                         } else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4560                                 /* 
4561                                  * Since the unmanaged calling convention doesn't contain a 
4562                                  * 'vararg' entry, we have to treat every pinvoke call as a
4563                                  * potential vararg call.
4564                                  */
4565                                 guint32 nregs, i;
4566                                 nregs = 0;
4567                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4568                                         if (call->used_fregs & (1 << i))
4569                                                 nregs ++;
4570                                 if (ins->sreg1 == AMD64_RAX) {
4571                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4572                                         ins->sreg1 = AMD64_R11;
4573                                 }
4574                                 if (!nregs)
4575                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4576                                 else
4577                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4578                         }
4579
4580                         amd64_call_reg (code, ins->sreg1);
4581                         ins->flags |= MONO_INST_GC_CALLSITE;
4582                         ins->backend.pc_offset = code - cfg->native_code;
4583                         code = emit_move_return_value (cfg, ins, code);
4584                         break;
4585                 case OP_FCALL_MEMBASE:
4586                 case OP_RCALL_MEMBASE:
4587                 case OP_LCALL_MEMBASE:
4588                 case OP_VCALL_MEMBASE:
4589                 case OP_VCALL2_MEMBASE:
4590                 case OP_VOIDCALL_MEMBASE:
4591                 case OP_CALL_MEMBASE:
4592                         call = (MonoCallInst*)ins;
4593
4594                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
4595                         ins->flags |= MONO_INST_GC_CALLSITE;
4596                         ins->backend.pc_offset = code - cfg->native_code;
4597                         code = emit_move_return_value (cfg, ins, code);
4598                         break;
4599                 case OP_DYN_CALL: {
4600                         int i;
4601                         MonoInst *var = cfg->dyn_call_var;
4602                         guint8 *label;
4603
4604                         g_assert (var->opcode == OP_REGOFFSET);
4605
4606                         /* r11 = args buffer filled by mono_arch_get_dyn_call_args () */
4607                         amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4608                         /* r10 = ftn */
4609                         amd64_mov_reg_reg (code, AMD64_R10, ins->sreg2, 8);
4610
4611                         /* Save args buffer */
4612                         amd64_mov_membase_reg (code, var->inst_basereg, var->inst_offset, AMD64_R11, 8);
4613
4614                         /* Set fp arg regs */
4615                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, has_fp), sizeof (mgreg_t));
4616                         amd64_test_reg_reg (code, AMD64_RAX, AMD64_RAX);
4617                         label = code;
4618                         amd64_branch8 (code, X86_CC_Z, -1, 1);
4619                         for (i = 0; i < FLOAT_PARAM_REGS; ++i)
4620                                 amd64_sse_movsd_reg_membase (code, i, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs) + (i * sizeof (double)));
4621                         amd64_patch (label, code);
4622
4623                         /* Set argument registers */
4624                         for (i = 0; i < PARAM_REGS; ++i)
4625                                 amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof(mgreg_t), sizeof(mgreg_t));
4626                         
4627                         /* Make the call */
4628                         amd64_call_reg (code, AMD64_R10);
4629
4630                         ins->flags |= MONO_INST_GC_CALLSITE;
4631                         ins->backend.pc_offset = code - cfg->native_code;
4632
4633                         /* Save result */
4634                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4635                         amd64_mov_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, res), AMD64_RAX, 8);
4636                         amd64_sse_movsd_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, fregs), AMD64_XMM0);
4637                         break;
4638                 }
4639                 case OP_AMD64_SAVE_SP_TO_LMF: {
4640                         MonoInst *lmf_var = cfg->lmf_var;
4641                         amd64_mov_membase_reg (code, lmf_var->inst_basereg, lmf_var->inst_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
4642                         break;
4643                 }
4644                 case OP_X86_PUSH:
4645                         g_assert_not_reached ();
4646                         amd64_push_reg (code, ins->sreg1);
4647                         break;
4648                 case OP_X86_PUSH_IMM:
4649                         g_assert_not_reached ();
4650                         g_assert (amd64_is_imm32 (ins->inst_imm));
4651                         amd64_push_imm (code, ins->inst_imm);
4652                         break;
4653                 case OP_X86_PUSH_MEMBASE:
4654                         g_assert_not_reached ();
4655                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
4656                         break;
4657                 case OP_X86_PUSH_OBJ: {
4658                         int size = ALIGN_TO (ins->inst_imm, 8);
4659
4660                         g_assert_not_reached ();
4661
4662                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4663                         amd64_push_reg (code, AMD64_RDI);
4664                         amd64_push_reg (code, AMD64_RSI);
4665                         amd64_push_reg (code, AMD64_RCX);
4666                         if (ins->inst_offset)
4667                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
4668                         else
4669                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
4670                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, (3 * 8));
4671                         amd64_mov_reg_imm (code, AMD64_RCX, (size >> 3));
4672                         amd64_cld (code);
4673                         amd64_prefix (code, X86_REP_PREFIX);
4674                         amd64_movsd (code);
4675                         amd64_pop_reg (code, AMD64_RCX);
4676                         amd64_pop_reg (code, AMD64_RSI);
4677                         amd64_pop_reg (code, AMD64_RDI);
4678                         break;
4679                 }
4680                 case OP_GENERIC_CLASS_INIT: {
4681                         static int byte_offset = -1;
4682                         static guint8 bitmask;
4683                         guint8 *jump;
4684
4685                         g_assert (ins->sreg1 == MONO_AMD64_ARG_REG1);
4686
4687                         if (byte_offset < 0)
4688                                 mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
4689
4690                         amd64_test_membase_imm_size (code, ins->sreg1, byte_offset, bitmask, 1);
4691                         jump = code;
4692                         amd64_branch8 (code, X86_CC_NZ, -1, 1);
4693
4694                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_generic_class_init", FALSE);
4695                         ins->flags |= MONO_INST_GC_CALLSITE;
4696                         ins->backend.pc_offset = code - cfg->native_code;
4697
4698                         x86_patch (jump, code);
4699                         break;
4700                 }
4701
4702                 case OP_X86_LEA:
4703                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
4704                         break;
4705                 case OP_X86_LEA_MEMBASE:
4706                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
4707                         break;
4708                 case OP_X86_XCHG:
4709                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
4710                         break;
4711                 case OP_LOCALLOC:
4712                         /* keep alignment */
4713                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
4714                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
4715                         code = mono_emit_stack_alloc (cfg, code, ins);
4716                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4717                         if (cfg->param_area)
4718                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4719                         break;
4720                 case OP_LOCALLOC_IMM: {
4721                         guint32 size = ins->inst_imm;
4722                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
4723
4724                         if (ins->flags & MONO_INST_INIT) {
4725                                 if (size < 64) {
4726                                         int i;
4727
4728                                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4729                                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4730
4731                                         for (i = 0; i < size; i += 8)
4732                                                 amd64_mov_membase_reg (code, AMD64_RSP, i, ins->dreg, 8);
4733                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);                                      
4734                                 } else {
4735                                         amd64_mov_reg_imm (code, ins->dreg, size);
4736                                         ins->sreg1 = ins->dreg;
4737
4738                                         code = mono_emit_stack_alloc (cfg, code, ins);
4739                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4740                                 }
4741                         } else {
4742                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4743                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4744                         }
4745                         if (cfg->param_area)
4746                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4747                         break;
4748                 }
4749                 case OP_THROW: {
4750                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4751                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4752                                              (gpointer)"mono_arch_throw_exception", FALSE);
4753                         ins->flags |= MONO_INST_GC_CALLSITE;
4754                         ins->backend.pc_offset = code - cfg->native_code;
4755                         break;
4756                 }
4757                 case OP_RETHROW: {
4758                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4759                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4760                                              (gpointer)"mono_arch_rethrow_exception", FALSE);
4761                         ins->flags |= MONO_INST_GC_CALLSITE;
4762                         ins->backend.pc_offset = code - cfg->native_code;
4763                         break;
4764                 }
4765                 case OP_CALL_HANDLER: 
4766                         /* Align stack */
4767                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4768                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
4769                         amd64_call_imm (code, 0);
4770                         mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
4771                         /* Restore stack alignment */
4772                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
4773                         break;
4774                 case OP_START_HANDLER: {
4775                         /* Even though we're saving RSP, use sizeof */
4776                         /* gpointer because spvar is of type IntPtr */
4777                         /* see: mono_create_spvar_for_region */
4778                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4779                         amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, sizeof(gpointer));
4780
4781                         if ((MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY) ||
4782                                  MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY)) &&
4783                                 cfg->param_area) {
4784                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
4785                         }
4786                         break;
4787                 }
4788                 case OP_ENDFINALLY: {
4789                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4790                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer));
4791                         amd64_ret (code);
4792                         break;
4793                 }
4794                 case OP_ENDFILTER: {
4795                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
4796                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer));
4797                         /* The local allocator will put the result into RAX */
4798                         amd64_ret (code);
4799                         break;
4800                 }
4801                 case OP_GET_EX_OBJ:
4802                         if (ins->dreg != AMD64_RAX)
4803                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, sizeof (gpointer));
4804                         break;
4805                 case OP_LABEL:
4806                         ins->inst_c0 = code - cfg->native_code;
4807                         break;
4808                 case OP_BR:
4809                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
4810                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
4811                         //break;
4812                                 if (ins->inst_target_bb->native_offset) {
4813                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
4814                                 } else {
4815                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
4816                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
4817                                             x86_is_imm8 (ins->inst_target_bb->max_offset - offset))
4818                                                 x86_jump8 (code, 0);
4819                                         else 
4820                                                 x86_jump32 (code, 0);
4821                         }
4822                         break;
4823                 case OP_BR_REG:
4824                         amd64_jump_reg (code, ins->sreg1);
4825                         break;
4826                 case OP_ICNEQ:
4827                 case OP_ICGE:
4828                 case OP_ICLE:
4829                 case OP_ICGE_UN:
4830                 case OP_ICLE_UN:
4831
4832                 case OP_CEQ:
4833                 case OP_LCEQ:
4834                 case OP_ICEQ:
4835                 case OP_CLT:
4836                 case OP_LCLT:
4837                 case OP_ICLT:
4838                 case OP_CGT:
4839                 case OP_ICGT:
4840                 case OP_LCGT:
4841                 case OP_CLT_UN:
4842                 case OP_LCLT_UN:
4843                 case OP_ICLT_UN:
4844                 case OP_CGT_UN:
4845                 case OP_LCGT_UN:
4846                 case OP_ICGT_UN:
4847                         amd64_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
4848                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4849                         break;
4850                 case OP_COND_EXC_EQ:
4851                 case OP_COND_EXC_NE_UN:
4852                 case OP_COND_EXC_LT:
4853                 case OP_COND_EXC_LT_UN:
4854                 case OP_COND_EXC_GT:
4855                 case OP_COND_EXC_GT_UN:
4856                 case OP_COND_EXC_GE:
4857                 case OP_COND_EXC_GE_UN:
4858                 case OP_COND_EXC_LE:
4859                 case OP_COND_EXC_LE_UN:
4860                 case OP_COND_EXC_IEQ:
4861                 case OP_COND_EXC_INE_UN:
4862                 case OP_COND_EXC_ILT:
4863                 case OP_COND_EXC_ILT_UN:
4864                 case OP_COND_EXC_IGT:
4865                 case OP_COND_EXC_IGT_UN:
4866                 case OP_COND_EXC_IGE:
4867                 case OP_COND_EXC_IGE_UN:
4868                 case OP_COND_EXC_ILE:
4869                 case OP_COND_EXC_ILE_UN:
4870                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], (const char *)ins->inst_p1);
4871                         break;
4872                 case OP_COND_EXC_OV:
4873                 case OP_COND_EXC_NO:
4874                 case OP_COND_EXC_C:
4875                 case OP_COND_EXC_NC:
4876                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
4877                                                     (ins->opcode < OP_COND_EXC_NE_UN), (const char *)ins->inst_p1);
4878                         break;
4879                 case OP_COND_EXC_IOV:
4880                 case OP_COND_EXC_INO:
4881                 case OP_COND_EXC_IC:
4882                 case OP_COND_EXC_INC:
4883                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], 
4884                                                     (ins->opcode < OP_COND_EXC_INE_UN), (const char *)ins->inst_p1);
4885                         break;
4886
4887                 /* floating point opcodes */
4888                 case OP_R8CONST: {
4889                         double d = *(double *)ins->inst_p0;
4890
4891                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
4892                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
4893                         }
4894                         else {
4895                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
4896                                 amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4897                         }
4898                         break;
4899                 }
4900                 case OP_R4CONST: {
4901                         float f = *(float *)ins->inst_p0;
4902
4903                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
4904                                 if (cfg->r4fp)
4905                                         amd64_sse_xorps_reg_reg (code, ins->dreg, ins->dreg);
4906                                 else
4907                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
4908                         }
4909                         else {
4910                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
4911                                 amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
4912                                 if (!cfg->r4fp)
4913                                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4914                         }
4915                         break;
4916                 }
4917                 case OP_STORER8_MEMBASE_REG:
4918                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
4919                         break;
4920                 case OP_LOADR8_MEMBASE:
4921                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
4922                         break;
4923                 case OP_STORER4_MEMBASE_REG:
4924                         if (cfg->r4fp) {
4925                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
4926                         } else {
4927                                 /* This requires a double->single conversion */
4928                                 amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
4929                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, MONO_ARCH_FP_SCRATCH_REG);
4930                         }
4931                         break;
4932                 case OP_LOADR4_MEMBASE:
4933                         if (cfg->r4fp) {
4934                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
4935                         } else {
4936                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
4937                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4938                         }
4939                         break;
4940                 case OP_ICONV_TO_R4:
4941                         if (cfg->r4fp) {
4942                                 amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4943                         } else {
4944                                 amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4945                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4946                         }
4947                         break;
4948                 case OP_ICONV_TO_R8:
4949                         amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4950                         break;
4951                 case OP_LCONV_TO_R4:
4952                         if (cfg->r4fp) {
4953                                 amd64_sse_cvtsi2ss_reg_reg (code, ins->dreg, ins->sreg1);
4954                         } else {
4955                                 amd64_sse_cvtsi2ss_reg_reg (code, ins->dreg, ins->sreg1);
4956                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4957                         }
4958                         break;
4959                 case OP_LCONV_TO_R8:
4960                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
4961                         break;
4962                 case OP_FCONV_TO_R4:
4963                         if (cfg->r4fp) {
4964                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
4965                         } else {
4966                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
4967                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
4968                         }
4969                         break;
4970                 case OP_FCONV_TO_I1:
4971                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
4972                         break;
4973                 case OP_FCONV_TO_U1:
4974                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
4975                         break;
4976                 case OP_FCONV_TO_I2:
4977                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
4978                         break;
4979                 case OP_FCONV_TO_U2:
4980                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
4981                         break;
4982                 case OP_FCONV_TO_U4:
4983                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);                  
4984                         break;
4985                 case OP_FCONV_TO_I4:
4986                 case OP_FCONV_TO_I:
4987                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
4988                         break;
4989                 case OP_FCONV_TO_I8:
4990                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
4991                         break;
4992
4993                 case OP_RCONV_TO_I1:
4994                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4995                         amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4996                         break;
4997                 case OP_RCONV_TO_U1:
4998                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
4999                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
5000                         break;
5001                 case OP_RCONV_TO_I2:
5002                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5003                         amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
5004                         break;
5005                 case OP_RCONV_TO_U2:
5006                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5007                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
5008                         break;
5009                 case OP_RCONV_TO_I4:
5010                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5011                         break;
5012                 case OP_RCONV_TO_U4:
5013                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5014                         break;
5015                 case OP_RCONV_TO_I8:
5016                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 8);
5017                         break;
5018                 case OP_RCONV_TO_R8:
5019                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->sreg1);
5020                         break;
5021                 case OP_RCONV_TO_R4:
5022                         if (ins->dreg != ins->sreg1)
5023                                 amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
5024                         break;
5025
5026                 case OP_LCONV_TO_R_UN: { 
5027                         guint8 *br [2];
5028
5029                         /* Based on gcc code */
5030                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
5031                         br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE);
5032
5033                         /* Positive case */
5034                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
5035                         br [1] = code; x86_jump8 (code, 0);
5036                         amd64_patch (br [0], code);
5037
5038                         /* Negative case */
5039                         /* Save to the red zone */
5040                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8);
5041                         amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
5042                         amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8);
5043                         amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
5044                         amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1);
5045                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1);
5046                         amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX);
5047                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX);
5048                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg);
5049                         /* Restore */
5050                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
5051                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8);
5052                         amd64_patch (br [1], code);
5053                         break;
5054                 }
5055                 case OP_LCONV_TO_OVF_U4:
5056                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
5057                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
5058                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
5059                         break;
5060                 case OP_LCONV_TO_OVF_I4_UN:
5061                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
5062                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
5063                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
5064                         break;
5065                 case OP_FMOVE:
5066                         if (ins->dreg != ins->sreg1)
5067                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5068                         break;
5069                 case OP_RMOVE:
5070                         if (ins->dreg != ins->sreg1)
5071                                 amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
5072                         break;
5073                 case OP_MOVE_F_TO_I4:
5074                         if (cfg->r4fp) {
5075                                 amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
5076                         } else {
5077                                 amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
5078                                 amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8);
5079                         }
5080                         break;
5081                 case OP_MOVE_I4_TO_F:
5082                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
5083                         if (!cfg->r4fp)
5084                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5085                         break;
5086                 case OP_MOVE_F_TO_I8:
5087                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
5088                         break;
5089                 case OP_MOVE_I8_TO_F:
5090                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
5091                         break;
5092                 case OP_FADD:
5093                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
5094                         break;
5095                 case OP_FSUB:
5096                         amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
5097                         break;          
5098                 case OP_FMUL:
5099                         amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
5100                         break;          
5101                 case OP_FDIV:
5102                         amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
5103                         break;          
5104                 case OP_FNEG: {
5105                         static double r8_0 = -0.0;
5106
5107                         g_assert (ins->sreg1 == ins->dreg);
5108                                         
5109                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0);
5110                         amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5111                         break;
5112                 }
5113                 case OP_SIN:
5114                         EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
5115                         break;          
5116                 case OP_COS:
5117                         EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
5118                         break;          
5119                 case OP_ABS: {
5120                         static guint64 d = 0x7fffffffffffffffUL;
5121
5122                         g_assert (ins->sreg1 == ins->dreg);
5123                                         
5124                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &d);
5125                         amd64_sse_andpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5126                         break;          
5127                 }
5128                 case OP_SQRT:
5129                         EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
5130                         break;
5131
5132                 case OP_RADD:
5133                         amd64_sse_addss_reg_reg (code, ins->dreg, ins->sreg2);
5134                         break;
5135                 case OP_RSUB:
5136                         amd64_sse_subss_reg_reg (code, ins->dreg, ins->sreg2);
5137                         break;
5138                 case OP_RMUL:
5139                         amd64_sse_mulss_reg_reg (code, ins->dreg, ins->sreg2);
5140                         break;
5141                 case OP_RDIV:
5142                         amd64_sse_divss_reg_reg (code, ins->dreg, ins->sreg2);
5143                         break;
5144                 case OP_RNEG: {
5145                         static float r4_0 = -0.0;
5146
5147                         g_assert (ins->sreg1 == ins->dreg);
5148
5149                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, &r4_0);
5150                         amd64_sse_movss_reg_membase (code, MONO_ARCH_FP_SCRATCH_REG, AMD64_RIP, 0);
5151                         amd64_sse_xorps_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
5152                         break;
5153                 }
5154
5155                 case OP_IMIN:
5156                         g_assert (cfg->opt & MONO_OPT_CMOV);
5157                         g_assert (ins->dreg == ins->sreg1);
5158                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5159                         amd64_cmov_reg_size (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2, 4);
5160                         break;
5161                 case OP_IMIN_UN:
5162                         g_assert (cfg->opt & MONO_OPT_CMOV);
5163                         g_assert (ins->dreg == ins->sreg1);
5164                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5165                         amd64_cmov_reg_size (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2, 4);
5166                         break;
5167                 case OP_IMAX:
5168                         g_assert (cfg->opt & MONO_OPT_CMOV);
5169                         g_assert (ins->dreg == ins->sreg1);
5170                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5171                         amd64_cmov_reg_size (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2, 4);
5172                         break;
5173                 case OP_IMAX_UN:
5174                         g_assert (cfg->opt & MONO_OPT_CMOV);
5175                         g_assert (ins->dreg == ins->sreg1);
5176                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5177                         amd64_cmov_reg_size (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2, 4);
5178                         break;
5179                 case OP_LMIN:
5180                         g_assert (cfg->opt & MONO_OPT_CMOV);
5181                         g_assert (ins->dreg == ins->sreg1);
5182                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5183                         amd64_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
5184                         break;
5185                 case OP_LMIN_UN:
5186                         g_assert (cfg->opt & MONO_OPT_CMOV);
5187                         g_assert (ins->dreg == ins->sreg1);
5188                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5189                         amd64_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
5190                         break;
5191                 case OP_LMAX:
5192                         g_assert (cfg->opt & MONO_OPT_CMOV);
5193                         g_assert (ins->dreg == ins->sreg1);
5194                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5195                         amd64_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
5196                         break;
5197                 case OP_LMAX_UN:
5198                         g_assert (cfg->opt & MONO_OPT_CMOV);
5199                         g_assert (ins->dreg == ins->sreg1);
5200                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5201                         amd64_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
5202                         break;  
5203                 case OP_X86_FPOP:
5204                         break;          
5205                 case OP_FCOMPARE:
5206                         /* 
5207                          * The two arguments are swapped because the fbranch instructions
5208                          * depend on this for the non-sse case to work.
5209                          */
5210                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5211                         break;
5212                 case OP_RCOMPARE:
5213                         /*
5214                          * FIXME: Get rid of this.
5215                          * The two arguments are swapped because the fbranch instructions
5216                          * depend on this for the non-sse case to work.
5217                          */
5218                         amd64_sse_comiss_reg_reg (code, ins->sreg2, ins->sreg1);
5219                         break;
5220                 case OP_FCNEQ:
5221                 case OP_FCEQ: {
5222                         /* zeroing the register at the start results in 
5223                          * shorter and faster code (we can also remove the widening op)
5224                          */
5225                         guchar *unordered_check;
5226
5227                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5228                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
5229                         unordered_check = code;
5230                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5231
5232                         if (ins->opcode == OP_FCEQ) {
5233                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
5234                                 amd64_patch (unordered_check, code);
5235                         } else {
5236                                 guchar *jump_to_end;
5237                                 amd64_set_reg (code, X86_CC_NE, ins->dreg, FALSE);
5238                                 jump_to_end = code;
5239                                 x86_jump8 (code, 0);
5240                                 amd64_patch (unordered_check, code);
5241                                 amd64_inc_reg (code, ins->dreg);
5242                                 amd64_patch (jump_to_end, code);
5243                         }
5244                         break;
5245                 }
5246                 case OP_FCLT:
5247                 case OP_FCLT_UN: {
5248                         /* zeroing the register at the start results in 
5249                          * shorter and faster code (we can also remove the widening op)
5250                          */
5251                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5252                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5253                         if (ins->opcode == OP_FCLT_UN) {
5254                                 guchar *unordered_check = code;
5255                                 guchar *jump_to_end;
5256                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5257                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
5258                                 jump_to_end = code;
5259                                 x86_jump8 (code, 0);
5260                                 amd64_patch (unordered_check, code);
5261                                 amd64_inc_reg (code, ins->dreg);
5262                                 amd64_patch (jump_to_end, code);
5263                         } else {
5264                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
5265                         }
5266                         break;
5267                 }
5268                 case OP_FCLE: {
5269                         guchar *unordered_check;
5270                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5271                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5272                         unordered_check = code;
5273                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5274                         amd64_set_reg (code, X86_CC_NB, ins->dreg, FALSE);
5275                         amd64_patch (unordered_check, code);
5276                         break;
5277                 }
5278                 case OP_FCGT:
5279                 case OP_FCGT_UN: {
5280                         /* zeroing the register at the start results in 
5281                          * shorter and faster code (we can also remove the widening op)
5282                          */
5283                         guchar *unordered_check;
5284
5285                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5286                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5287                         if (ins->opcode == OP_FCGT) {
5288                                 unordered_check = code;
5289                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5290                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
5291                                 amd64_patch (unordered_check, code);
5292                         } else {
5293                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
5294                         }
5295                         break;
5296                 }
5297                 case OP_FCGE: {
5298                         guchar *unordered_check;
5299                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5300                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5301                         unordered_check = code;
5302                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5303                         amd64_set_reg (code, X86_CC_NA, ins->dreg, FALSE);
5304                         amd64_patch (unordered_check, code);
5305                         break;
5306                 }
5307
5308                 case OP_RCEQ:
5309                 case OP_RCGT:
5310                 case OP_RCLT:
5311                 case OP_RCLT_UN:
5312                 case OP_RCGT_UN: {
5313                         int x86_cond;
5314                         gboolean unordered = FALSE;
5315
5316                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5317                         amd64_sse_comiss_reg_reg (code, ins->sreg2, ins->sreg1);
5318
5319                         switch (ins->opcode) {
5320                         case OP_RCEQ:
5321                                 x86_cond = X86_CC_EQ;
5322                                 break;
5323                         case OP_RCGT:
5324                                 x86_cond = X86_CC_LT;
5325                                 break;
5326                         case OP_RCLT:
5327                                 x86_cond = X86_CC_GT;
5328                                 break;
5329                         case OP_RCLT_UN:
5330                                 x86_cond = X86_CC_GT;
5331                                 unordered = TRUE;
5332                                 break;
5333                         case OP_RCGT_UN:
5334                                 x86_cond = X86_CC_LT;
5335                                 unordered = TRUE;
5336                                 break;
5337                         default:
5338                                 g_assert_not_reached ();
5339                                 break;
5340                         }
5341
5342                         if (unordered) {
5343                                 guchar *unordered_check;
5344                                 guchar *jump_to_end;
5345
5346                                 unordered_check = code;
5347                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5348                                 amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5349                                 jump_to_end = code;
5350                                 x86_jump8 (code, 0);
5351                                 amd64_patch (unordered_check, code);
5352                                 amd64_inc_reg (code, ins->dreg);
5353                                 amd64_patch (jump_to_end, code);
5354                         } else {
5355                                 amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5356                         }
5357                         break;
5358                 }
5359                 case OP_FCLT_MEMBASE:
5360                 case OP_FCGT_MEMBASE:
5361                 case OP_FCLT_UN_MEMBASE:
5362                 case OP_FCGT_UN_MEMBASE:
5363                 case OP_FCEQ_MEMBASE: {
5364                         guchar *unordered_check, *jump_to_end;
5365                         int x86_cond;
5366
5367                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5368                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
5369
5370                         switch (ins->opcode) {
5371                         case OP_FCEQ_MEMBASE:
5372                                 x86_cond = X86_CC_EQ;
5373                                 break;
5374                         case OP_FCLT_MEMBASE:
5375                         case OP_FCLT_UN_MEMBASE:
5376                                 x86_cond = X86_CC_LT;
5377                                 break;
5378                         case OP_FCGT_MEMBASE:
5379                         case OP_FCGT_UN_MEMBASE:
5380                                 x86_cond = X86_CC_GT;
5381                                 break;
5382                         default:
5383                                 g_assert_not_reached ();
5384                         }
5385
5386                         unordered_check = code;
5387                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5388                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5389
5390                         switch (ins->opcode) {
5391                         case OP_FCEQ_MEMBASE:
5392                         case OP_FCLT_MEMBASE:
5393                         case OP_FCGT_MEMBASE:
5394                                 amd64_patch (unordered_check, code);
5395                                 break;
5396                         case OP_FCLT_UN_MEMBASE:
5397                         case OP_FCGT_UN_MEMBASE:
5398                                 jump_to_end = code;
5399                                 x86_jump8 (code, 0);
5400                                 amd64_patch (unordered_check, code);
5401                                 amd64_inc_reg (code, ins->dreg);
5402                                 amd64_patch (jump_to_end, code);
5403                                 break;
5404                         default:
5405                                 break;
5406                         }
5407                         break;
5408                 }
5409                 case OP_FBEQ: {
5410                         guchar *jump = code;
5411                         x86_branch8 (code, X86_CC_P, 0, TRUE);
5412                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
5413                         amd64_patch (jump, code);
5414                         break;
5415                 }
5416                 case OP_FBNE_UN:
5417                         /* Branch if C013 != 100 */
5418                         /* branch if !ZF or (PF|CF) */
5419                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
5420                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5421                         EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
5422                         break;
5423                 case OP_FBLT:
5424                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
5425                         break;
5426                 case OP_FBLT_UN:
5427                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5428                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
5429                         break;
5430                 case OP_FBGT:
5431                 case OP_FBGT_UN:
5432                         if (ins->opcode == OP_FBGT) {
5433                                 guchar *br1;
5434
5435                                 /* skip branch if C1=1 */
5436                                 br1 = code;
5437                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5438                                 /* branch if (C0 | C3) = 1 */
5439                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
5440                                 amd64_patch (br1, code);
5441                                 break;
5442                         } else {
5443                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
5444                         }
5445                         break;
5446                 case OP_FBGE: {
5447                         /* Branch if C013 == 100 or 001 */
5448                         guchar *br1;
5449
5450                         /* skip branch if C1=1 */
5451                         br1 = code;
5452                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5453                         /* branch if (C0 | C3) = 1 */
5454                         EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
5455                         amd64_patch (br1, code);
5456                         break;
5457                 }
5458                 case OP_FBGE_UN:
5459                         /* Branch if C013 == 000 */
5460                         EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
5461                         break;
5462                 case OP_FBLE: {
5463                         /* Branch if C013=000 or 100 */
5464                         guchar *br1;
5465
5466                         /* skip branch if C1=1 */
5467                         br1 = code;
5468                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5469                         /* branch if C0=0 */
5470                         EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
5471                         amd64_patch (br1, code);
5472                         break;
5473                 }
5474                 case OP_FBLE_UN:
5475                         /* Branch if C013 != 001 */
5476                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5477                         EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
5478                         break;
5479                 case OP_CKFINITE:
5480                         /* Transfer value to the fp stack */
5481                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
5482                         amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
5483                         amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
5484
5485                         amd64_push_reg (code, AMD64_RAX);
5486                         amd64_fxam (code);
5487                         amd64_fnstsw (code);
5488                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
5489                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
5490                         amd64_pop_reg (code, AMD64_RAX);
5491                         amd64_fstp (code, 0);
5492                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "OverflowException");
5493                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
5494                         break;
5495                 case OP_TLS_GET: {
5496                         code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset);
5497                         break;
5498                 }
5499                 case OP_TLS_GET_REG:
5500                         code = emit_tls_get_reg (code, ins->dreg, ins->sreg1);
5501                         break;
5502                 case OP_TLS_SET: {
5503                         code = amd64_emit_tls_set (code, ins->sreg1, ins->inst_offset);
5504                         break;
5505                 }
5506                 case OP_TLS_SET_REG: {
5507                         code = amd64_emit_tls_set_reg (code, ins->sreg1, ins->sreg2);
5508                         break;
5509                 }
5510                 case OP_MEMORY_BARRIER: {
5511                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5512                                 x86_mfence (code);
5513                         break;
5514                 }
5515                 case OP_ATOMIC_ADD_I4:
5516                 case OP_ATOMIC_ADD_I8: {
5517                         int dreg = ins->dreg;
5518                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
5519
5520                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
5521                                 dreg = AMD64_R11;
5522
5523                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
5524                         amd64_prefix (code, X86_LOCK_PREFIX);
5525                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
5526                         /* dreg contains the old value, add with sreg2 value */
5527                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
5528                         
5529                         if (ins->dreg != dreg)
5530                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
5531
5532                         break;
5533                 }
5534                 case OP_ATOMIC_EXCHANGE_I4:
5535                 case OP_ATOMIC_EXCHANGE_I8: {
5536                         guint32 size = ins->opcode == OP_ATOMIC_EXCHANGE_I4 ? 4 : 8;
5537
5538                         /* LOCK prefix is implied. */
5539                         amd64_mov_reg_reg (code, GP_SCRATCH_REG, ins->sreg2, size);
5540                         amd64_xchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, GP_SCRATCH_REG, size);
5541                         amd64_mov_reg_reg (code, ins->dreg, GP_SCRATCH_REG, size);
5542                         break;
5543                 }
5544                 case OP_ATOMIC_CAS_I4:
5545                 case OP_ATOMIC_CAS_I8: {
5546                         guint32 size;
5547
5548                         if (ins->opcode == OP_ATOMIC_CAS_I8)
5549                                 size = 8;
5550                         else
5551                                 size = 4;
5552
5553                         /* 
5554                          * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
5555                          * an explanation of how this works.
5556                          */
5557                         g_assert (ins->sreg3 == AMD64_RAX);
5558                         g_assert (ins->sreg1 != AMD64_RAX);
5559                         g_assert (ins->sreg1 != ins->sreg2);
5560
5561                         amd64_prefix (code, X86_LOCK_PREFIX);
5562                         amd64_cmpxchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
5563
5564                         if (ins->dreg != AMD64_RAX)
5565                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
5566                         break;
5567                 }
5568                 case OP_ATOMIC_LOAD_I1: {
5569                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
5570                         break;
5571                 }
5572                 case OP_ATOMIC_LOAD_U1: {
5573                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
5574                         break;
5575                 }
5576                 case OP_ATOMIC_LOAD_I2: {
5577                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
5578                         break;
5579                 }
5580                 case OP_ATOMIC_LOAD_U2: {
5581                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
5582                         break;
5583                 }
5584                 case OP_ATOMIC_LOAD_I4: {
5585                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5586                         break;
5587                 }
5588                 case OP_ATOMIC_LOAD_U4:
5589                 case OP_ATOMIC_LOAD_I8:
5590                 case OP_ATOMIC_LOAD_U8: {
5591                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, ins->opcode == OP_ATOMIC_LOAD_U4 ? 4 : 8);
5592                         break;
5593                 }
5594                 case OP_ATOMIC_LOAD_R4: {
5595                         amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5596                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5597                         break;
5598                 }
5599                 case OP_ATOMIC_LOAD_R8: {
5600                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5601                         break;
5602                 }
5603                 case OP_ATOMIC_STORE_I1:
5604                 case OP_ATOMIC_STORE_U1:
5605                 case OP_ATOMIC_STORE_I2:
5606                 case OP_ATOMIC_STORE_U2:
5607                 case OP_ATOMIC_STORE_I4:
5608                 case OP_ATOMIC_STORE_U4:
5609                 case OP_ATOMIC_STORE_I8:
5610                 case OP_ATOMIC_STORE_U8: {
5611                         int size;
5612
5613                         switch (ins->opcode) {
5614                         case OP_ATOMIC_STORE_I1:
5615                         case OP_ATOMIC_STORE_U1:
5616                                 size = 1;
5617                                 break;
5618                         case OP_ATOMIC_STORE_I2:
5619                         case OP_ATOMIC_STORE_U2:
5620                                 size = 2;
5621                                 break;
5622                         case OP_ATOMIC_STORE_I4:
5623                         case OP_ATOMIC_STORE_U4:
5624                                 size = 4;
5625                                 break;
5626                         case OP_ATOMIC_STORE_I8:
5627                         case OP_ATOMIC_STORE_U8:
5628                                 size = 8;
5629                                 break;
5630                         }
5631
5632                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, size);
5633
5634                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5635                                 x86_mfence (code);
5636                         break;
5637                 }
5638                 case OP_ATOMIC_STORE_R4: {
5639                         amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
5640                         amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, MONO_ARCH_FP_SCRATCH_REG);
5641
5642                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5643                                 x86_mfence (code);
5644                         break;
5645                 }
5646                 case OP_ATOMIC_STORE_R8: {
5647                         x86_nop (code);
5648                         x86_nop (code);
5649                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
5650                         x86_nop (code);
5651                         x86_nop (code);
5652
5653                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5654                                 x86_mfence (code);
5655                         break;
5656                 }
5657                 case OP_CARD_TABLE_WBARRIER: {
5658                         int ptr = ins->sreg1;
5659                         int value = ins->sreg2;
5660                         guchar *br = 0;
5661                         int nursery_shift, card_table_shift;
5662                         gpointer card_table_mask;
5663                         size_t nursery_size;
5664
5665                         gpointer card_table = mono_gc_get_card_table (&card_table_shift, &card_table_mask);
5666                         guint64 nursery_start = (guint64)mono_gc_get_nursery (&nursery_shift, &nursery_size);
5667                         guint64 shifted_nursery_start = nursery_start >> nursery_shift;
5668
5669                         /*If either point to the stack we can simply avoid the WB. This happens due to
5670                          * optimizations revealing a stack store that was not visible when op_cardtable was emited.
5671                          */
5672                         if (ins->sreg1 == AMD64_RSP || ins->sreg2 == AMD64_RSP)
5673                                 continue;
5674
5675                         /*
5676                          * We need one register we can clobber, we choose EDX and make sreg1
5677                          * fixed EAX to work around limitations in the local register allocator.
5678                          * sreg2 might get allocated to EDX, but that is not a problem since
5679                          * we use it before clobbering EDX.
5680                          */
5681                         g_assert (ins->sreg1 == AMD64_RAX);
5682
5683                         /*
5684                          * This is the code we produce:
5685                          *
5686                          *   edx = value
5687                          *   edx >>= nursery_shift
5688                          *   cmp edx, (nursery_start >> nursery_shift)
5689                          *   jne done
5690                          *   edx = ptr
5691                          *   edx >>= card_table_shift
5692                          *   edx += cardtable
5693                          *   [edx] = 1
5694                          * done:
5695                          */
5696
5697                         if (mono_gc_card_table_nursery_check ()) {
5698                                 if (value != AMD64_RDX)
5699                                         amd64_mov_reg_reg (code, AMD64_RDX, value, 8);
5700                                 amd64_shift_reg_imm (code, X86_SHR, AMD64_RDX, nursery_shift);
5701                                 if (shifted_nursery_start >> 31) {
5702                                         /*
5703                                          * The value we need to compare against is 64 bits, so we need
5704                                          * another spare register.  We use RBX, which we save and
5705                                          * restore.
5706                                          */
5707                                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RBX, 8);
5708                                         amd64_mov_reg_imm (code, AMD64_RBX, shifted_nursery_start);
5709                                         amd64_alu_reg_reg (code, X86_CMP, AMD64_RDX, AMD64_RBX);
5710                                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RSP, -8, 8);
5711                                 } else {
5712                                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RDX, shifted_nursery_start);
5713                                 }
5714                                 br = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
5715                         }
5716                         amd64_mov_reg_reg (code, AMD64_RDX, ptr, 8);
5717                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RDX, card_table_shift);
5718                         if (card_table_mask)
5719                                 amd64_alu_reg_imm (code, X86_AND, AMD64_RDX, (guint32)(guint64)card_table_mask);
5720
5721                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GC_CARD_TABLE_ADDR, card_table);
5722                         amd64_alu_reg_membase (code, X86_ADD, AMD64_RDX, AMD64_RIP, 0);
5723
5724                         amd64_mov_membase_imm (code, AMD64_RDX, 0, 1, 1);
5725
5726                         if (mono_gc_card_table_nursery_check ())
5727                                 x86_patch (br, code);
5728                         break;
5729                 }
5730 #ifdef MONO_ARCH_SIMD_INTRINSICS
5731                 /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
5732                 case OP_ADDPS:
5733                         amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
5734                         break;
5735                 case OP_DIVPS:
5736                         amd64_sse_divps_reg_reg (code, ins->sreg1, ins->sreg2);
5737                         break;
5738                 case OP_MULPS:
5739                         amd64_sse_mulps_reg_reg (code, ins->sreg1, ins->sreg2);
5740                         break;
5741                 case OP_SUBPS:
5742                         amd64_sse_subps_reg_reg (code, ins->sreg1, ins->sreg2);
5743                         break;
5744                 case OP_MAXPS:
5745                         amd64_sse_maxps_reg_reg (code, ins->sreg1, ins->sreg2);
5746                         break;
5747                 case OP_MINPS:
5748                         amd64_sse_minps_reg_reg (code, ins->sreg1, ins->sreg2);
5749                         break;
5750                 case OP_COMPPS:
5751                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
5752                         amd64_sse_cmpps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5753                         break;
5754                 case OP_ANDPS:
5755                         amd64_sse_andps_reg_reg (code, ins->sreg1, ins->sreg2);
5756                         break;
5757                 case OP_ANDNPS:
5758                         amd64_sse_andnps_reg_reg (code, ins->sreg1, ins->sreg2);
5759                         break;
5760                 case OP_ORPS:
5761                         amd64_sse_orps_reg_reg (code, ins->sreg1, ins->sreg2);
5762                         break;
5763                 case OP_XORPS:
5764                         amd64_sse_xorps_reg_reg (code, ins->sreg1, ins->sreg2);
5765                         break;
5766                 case OP_SQRTPS:
5767                         amd64_sse_sqrtps_reg_reg (code, ins->dreg, ins->sreg1);
5768                         break;
5769                 case OP_RSQRTPS:
5770                         amd64_sse_rsqrtps_reg_reg (code, ins->dreg, ins->sreg1);
5771                         break;
5772                 case OP_RCPPS:
5773                         amd64_sse_rcpps_reg_reg (code, ins->dreg, ins->sreg1);
5774                         break;
5775                 case OP_ADDSUBPS:
5776                         amd64_sse_addsubps_reg_reg (code, ins->sreg1, ins->sreg2);
5777                         break;
5778                 case OP_HADDPS:
5779                         amd64_sse_haddps_reg_reg (code, ins->sreg1, ins->sreg2);
5780                         break;
5781                 case OP_HSUBPS:
5782                         amd64_sse_hsubps_reg_reg (code, ins->sreg1, ins->sreg2);
5783                         break;
5784                 case OP_DUPPS_HIGH:
5785                         amd64_sse_movshdup_reg_reg (code, ins->dreg, ins->sreg1);
5786                         break;
5787                 case OP_DUPPS_LOW:
5788                         amd64_sse_movsldup_reg_reg (code, ins->dreg, ins->sreg1);
5789                         break;
5790
5791                 case OP_PSHUFLEW_HIGH:
5792                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5793                         amd64_sse_pshufhw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5794                         break;
5795                 case OP_PSHUFLEW_LOW:
5796                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5797                         amd64_sse_pshuflw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5798                         break;
5799                 case OP_PSHUFLED:
5800                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5801                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
5802                         break;
5803                 case OP_SHUFPS:
5804                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
5805                         amd64_sse_shufps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5806                         break;
5807                 case OP_SHUFPD:
5808                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3);
5809                         amd64_sse_shufpd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5810                         break;
5811
5812                 case OP_ADDPD:
5813                         amd64_sse_addpd_reg_reg (code, ins->sreg1, ins->sreg2);
5814                         break;
5815                 case OP_DIVPD:
5816                         amd64_sse_divpd_reg_reg (code, ins->sreg1, ins->sreg2);
5817                         break;
5818                 case OP_MULPD:
5819                         amd64_sse_mulpd_reg_reg (code, ins->sreg1, ins->sreg2);
5820                         break;
5821                 case OP_SUBPD:
5822                         amd64_sse_subpd_reg_reg (code, ins->sreg1, ins->sreg2);
5823                         break;
5824                 case OP_MAXPD:
5825                         amd64_sse_maxpd_reg_reg (code, ins->sreg1, ins->sreg2);
5826                         break;
5827                 case OP_MINPD:
5828                         amd64_sse_minpd_reg_reg (code, ins->sreg1, ins->sreg2);
5829                         break;
5830                 case OP_COMPPD:
5831                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
5832                         amd64_sse_cmppd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5833                         break;
5834                 case OP_ANDPD:
5835                         amd64_sse_andpd_reg_reg (code, ins->sreg1, ins->sreg2);
5836                         break;
5837                 case OP_ANDNPD:
5838                         amd64_sse_andnpd_reg_reg (code, ins->sreg1, ins->sreg2);
5839                         break;
5840                 case OP_ORPD:
5841                         amd64_sse_orpd_reg_reg (code, ins->sreg1, ins->sreg2);
5842                         break;
5843                 case OP_XORPD:
5844                         amd64_sse_xorpd_reg_reg (code, ins->sreg1, ins->sreg2);
5845                         break;
5846                 case OP_SQRTPD:
5847                         amd64_sse_sqrtpd_reg_reg (code, ins->dreg, ins->sreg1);
5848                         break;
5849                 case OP_ADDSUBPD:
5850                         amd64_sse_addsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
5851                         break;
5852                 case OP_HADDPD:
5853                         amd64_sse_haddpd_reg_reg (code, ins->sreg1, ins->sreg2);
5854                         break;
5855                 case OP_HSUBPD:
5856                         amd64_sse_hsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
5857                         break;
5858                 case OP_DUPPD:
5859                         amd64_sse_movddup_reg_reg (code, ins->dreg, ins->sreg1);
5860                         break;
5861
5862                 case OP_EXTRACT_MASK:
5863                         amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
5864                         break;
5865
5866                 case OP_PAND:
5867                         amd64_sse_pand_reg_reg (code, ins->sreg1, ins->sreg2);
5868                         break;
5869                 case OP_POR:
5870                         amd64_sse_por_reg_reg (code, ins->sreg1, ins->sreg2);
5871                         break;
5872                 case OP_PXOR:
5873                         amd64_sse_pxor_reg_reg (code, ins->sreg1, ins->sreg2);
5874                         break;
5875
5876                 case OP_PADDB:
5877                         amd64_sse_paddb_reg_reg (code, ins->sreg1, ins->sreg2);
5878                         break;
5879                 case OP_PADDW:
5880                         amd64_sse_paddw_reg_reg (code, ins->sreg1, ins->sreg2);
5881                         break;
5882                 case OP_PADDD:
5883                         amd64_sse_paddd_reg_reg (code, ins->sreg1, ins->sreg2);
5884                         break;
5885                 case OP_PADDQ:
5886                         amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
5887                         break;
5888
5889                 case OP_PSUBB:
5890                         amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
5891                         break;
5892                 case OP_PSUBW:
5893                         amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
5894                         break;
5895                 case OP_PSUBD:
5896                         amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
5897                         break;
5898                 case OP_PSUBQ:
5899                         amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
5900                         break;
5901
5902                 case OP_PMAXB_UN:
5903                         amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
5904                         break;
5905                 case OP_PMAXW_UN:
5906                         amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
5907                         break;
5908                 case OP_PMAXD_UN:
5909                         amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
5910                         break;
5911                 
5912                 case OP_PMAXB:
5913                         amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
5914                         break;
5915                 case OP_PMAXW:
5916                         amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
5917                         break;
5918                 case OP_PMAXD:
5919                         amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
5920                         break;
5921
5922                 case OP_PAVGB_UN:
5923                         amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
5924                         break;
5925                 case OP_PAVGW_UN:
5926                         amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
5927                         break;
5928
5929                 case OP_PMINB_UN:
5930                         amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
5931                         break;
5932                 case OP_PMINW_UN:
5933                         amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
5934                         break;
5935                 case OP_PMIND_UN:
5936                         amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
5937                         break;
5938
5939                 case OP_PMINB:
5940                         amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
5941                         break;
5942                 case OP_PMINW:
5943                         amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
5944                         break;
5945                 case OP_PMIND:
5946                         amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
5947                         break;
5948
5949                 case OP_PCMPEQB:
5950                         amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
5951                         break;
5952                 case OP_PCMPEQW:
5953                         amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
5954                         break;
5955                 case OP_PCMPEQD:
5956                         amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
5957                         break;
5958                 case OP_PCMPEQQ:
5959                         amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
5960                         break;
5961
5962                 case OP_PCMPGTB:
5963                         amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
5964                         break;
5965                 case OP_PCMPGTW:
5966                         amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
5967                         break;
5968                 case OP_PCMPGTD:
5969                         amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
5970                         break;
5971                 case OP_PCMPGTQ:
5972                         amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
5973                         break;
5974
5975                 case OP_PSUM_ABS_DIFF:
5976                         amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
5977                         break;
5978
5979                 case OP_UNPACK_LOWB:
5980                         amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
5981                         break;
5982                 case OP_UNPACK_LOWW:
5983                         amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
5984                         break;
5985                 case OP_UNPACK_LOWD:
5986                         amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
5987                         break;
5988                 case OP_UNPACK_LOWQ:
5989                         amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
5990                         break;
5991                 case OP_UNPACK_LOWPS:
5992                         amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
5993                         break;
5994                 case OP_UNPACK_LOWPD:
5995                         amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
5996                         break;
5997
5998                 case OP_UNPACK_HIGHB:
5999                         amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
6000                         break;
6001                 case OP_UNPACK_HIGHW:
6002                         amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
6003                         break;
6004                 case OP_UNPACK_HIGHD:
6005                         amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
6006                         break;
6007                 case OP_UNPACK_HIGHQ:
6008                         amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
6009                         break;
6010                 case OP_UNPACK_HIGHPS:
6011                         amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
6012                         break;
6013                 case OP_UNPACK_HIGHPD:
6014                         amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
6015                         break;
6016
6017                 case OP_PACKW:
6018                         amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
6019                         break;
6020                 case OP_PACKD:
6021                         amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
6022                         break;
6023                 case OP_PACKW_UN:
6024                         amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
6025                         break;
6026                 case OP_PACKD_UN:
6027                         amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
6028                         break;
6029
6030                 case OP_PADDB_SAT_UN:
6031                         amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
6032                         break;
6033                 case OP_PSUBB_SAT_UN:
6034                         amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
6035                         break;
6036                 case OP_PADDW_SAT_UN:
6037                         amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
6038                         break;
6039                 case OP_PSUBW_SAT_UN:
6040                         amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
6041                         break;
6042
6043                 case OP_PADDB_SAT:
6044                         amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
6045                         break;
6046                 case OP_PSUBB_SAT:
6047                         amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
6048                         break;
6049                 case OP_PADDW_SAT:
6050                         amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
6051                         break;
6052                 case OP_PSUBW_SAT:
6053                         amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
6054                         break;
6055                         
6056                 case OP_PMULW:
6057                         amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
6058                         break;
6059                 case OP_PMULD:
6060                         amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
6061                         break;
6062                 case OP_PMULQ:
6063                         amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
6064                         break;
6065                 case OP_PMULW_HIGH_UN:
6066                         amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
6067                         break;
6068                 case OP_PMULW_HIGH:
6069                         amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
6070                         break;
6071
6072                 case OP_PSHRW:
6073                         amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
6074                         break;
6075                 case OP_PSHRW_REG:
6076                         amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
6077                         break;
6078
6079                 case OP_PSARW:
6080                         amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
6081                         break;
6082                 case OP_PSARW_REG:
6083                         amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
6084                         break;
6085
6086                 case OP_PSHLW:
6087                         amd64_sse_psllw_reg_imm (code, ins->dreg, ins->inst_imm);
6088                         break;
6089                 case OP_PSHLW_REG:
6090                         amd64_sse_psllw_reg_reg (code, ins->dreg, ins->sreg2);
6091                         break;
6092
6093                 case OP_PSHRD:
6094                         amd64_sse_psrld_reg_imm (code, ins->dreg, ins->inst_imm);
6095                         break;
6096                 case OP_PSHRD_REG:
6097                         amd64_sse_psrld_reg_reg (code, ins->dreg, ins->sreg2);
6098                         break;
6099
6100                 case OP_PSARD:
6101                         amd64_sse_psrad_reg_imm (code, ins->dreg, ins->inst_imm);
6102                         break;
6103                 case OP_PSARD_REG:
6104                         amd64_sse_psrad_reg_reg (code, ins->dreg, ins->sreg2);
6105                         break;
6106
6107                 case OP_PSHLD:
6108                         amd64_sse_pslld_reg_imm (code, ins->dreg, ins->inst_imm);
6109                         break;
6110                 case OP_PSHLD_REG:
6111                         amd64_sse_pslld_reg_reg (code, ins->dreg, ins->sreg2);
6112                         break;
6113
6114                 case OP_PSHRQ:
6115                         amd64_sse_psrlq_reg_imm (code, ins->dreg, ins->inst_imm);
6116                         break;
6117                 case OP_PSHRQ_REG:
6118                         amd64_sse_psrlq_reg_reg (code, ins->dreg, ins->sreg2);
6119                         break;
6120                 
6121                 /*TODO: This is appart of the sse spec but not added
6122                 case OP_PSARQ:
6123                         amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
6124                         break;
6125                 case OP_PSARQ_REG:
6126                         amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
6127                         break;  
6128                 */
6129         
6130                 case OP_PSHLQ:
6131                         amd64_sse_psllq_reg_imm (code, ins->dreg, ins->inst_imm);
6132                         break;
6133                 case OP_PSHLQ_REG:
6134                         amd64_sse_psllq_reg_reg (code, ins->dreg, ins->sreg2);
6135                         break;  
6136                 case OP_CVTDQ2PD:
6137                         amd64_sse_cvtdq2pd_reg_reg (code, ins->dreg, ins->sreg1);
6138                         break;
6139                 case OP_CVTDQ2PS:
6140                         amd64_sse_cvtdq2ps_reg_reg (code, ins->dreg, ins->sreg1);
6141                         break;
6142                 case OP_CVTPD2DQ:
6143                         amd64_sse_cvtpd2dq_reg_reg (code, ins->dreg, ins->sreg1);
6144                         break;
6145                 case OP_CVTPD2PS:
6146                         amd64_sse_cvtpd2ps_reg_reg (code, ins->dreg, ins->sreg1);
6147                         break;
6148                 case OP_CVTPS2DQ:
6149                         amd64_sse_cvtps2dq_reg_reg (code, ins->dreg, ins->sreg1);
6150                         break;
6151                 case OP_CVTPS2PD:
6152                         amd64_sse_cvtps2pd_reg_reg (code, ins->dreg, ins->sreg1);
6153                         break;
6154                 case OP_CVTTPD2DQ:
6155                         amd64_sse_cvttpd2dq_reg_reg (code, ins->dreg, ins->sreg1);
6156                         break;
6157                 case OP_CVTTPS2DQ:
6158                         amd64_sse_cvttps2dq_reg_reg (code, ins->dreg, ins->sreg1);
6159                         break;
6160
6161                 case OP_ICONV_TO_X:
6162                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6163                         break;
6164                 case OP_EXTRACT_I4:
6165                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6166                         break;
6167                 case OP_EXTRACT_I8:
6168                         if (ins->inst_c0) {
6169                                 amd64_movhlps_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
6170                                 amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8);
6171                         } else {
6172                                 amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
6173                         }
6174                         break;
6175                 case OP_EXTRACT_I1:
6176                 case OP_EXTRACT_U1:
6177                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6178                         if (ins->inst_c0)
6179                                 amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
6180                         amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
6181                         break;
6182                 case OP_EXTRACT_I2:
6183                 case OP_EXTRACT_U2:
6184                         /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6185                         if (ins->inst_c0)
6186                                 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
6187                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6188                         amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4);
6189                         break;
6190                 case OP_EXTRACT_R8:
6191                         if (ins->inst_c0)
6192                                 amd64_movhlps_reg_reg (code, ins->dreg, ins->sreg1);
6193                         else
6194                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6195                         break;
6196                 case OP_INSERT_I2:
6197                         amd64_sse_pinsrw_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
6198                         break;
6199                 case OP_EXTRACTX_U2:
6200                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6201                         break;
6202                 case OP_INSERTX_U1_SLOW:
6203                         /*sreg1 is the extracted ireg (scratch)
6204                         /sreg2 is the to be inserted ireg (scratch)
6205                         /dreg is the xreg to receive the value*/
6206
6207                         /*clear the bits from the extracted word*/
6208                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
6209                         /*shift the value to insert if needed*/
6210                         if (ins->inst_c0 & 1)
6211                                 amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4);
6212                         /*join them together*/
6213                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
6214                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
6215                         break;
6216                 case OP_INSERTX_I4_SLOW:
6217                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
6218                         amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
6219                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
6220                         break;
6221                 case OP_INSERTX_I8_SLOW:
6222                         amd64_movd_xreg_reg_size(code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg2, 8);
6223                         if (ins->inst_c0)
6224                                 amd64_movlhps_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
6225                         else
6226                                 amd64_sse_movsd_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
6227                         break;
6228
6229                 case OP_INSERTX_R4_SLOW:
6230                         switch (ins->inst_c0) {
6231                         case 0:
6232                                 if (cfg->r4fp)
6233                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6234                                 else
6235                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6236                                 break;
6237                         case 1:
6238                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
6239                                 if (cfg->r4fp)
6240                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6241                                 else
6242                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6243                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
6244                                 break;
6245                         case 2:
6246                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
6247                                 if (cfg->r4fp)
6248                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6249                                 else
6250                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6251                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
6252                                 break;
6253                         case 3:
6254                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
6255                                 if (cfg->r4fp)
6256                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6257                                 else
6258                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6259                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
6260                                 break;
6261                         }
6262                         break;
6263                 case OP_INSERTX_R8_SLOW:
6264                         if (ins->inst_c0)
6265                                 amd64_movlhps_reg_reg (code, ins->dreg, ins->sreg2);
6266                         else
6267                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg2);
6268                         break;
6269                 case OP_STOREX_MEMBASE_REG:
6270                 case OP_STOREX_MEMBASE:
6271                         amd64_sse_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
6272                         break;
6273                 case OP_LOADX_MEMBASE:
6274                         amd64_sse_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6275                         break;
6276                 case OP_LOADX_ALIGNED_MEMBASE:
6277                         amd64_sse_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6278                         break;
6279                 case OP_STOREX_ALIGNED_MEMBASE_REG:
6280                         amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
6281                         break;
6282                 case OP_STOREX_NTA_MEMBASE_REG:
6283                         amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6284                         break;
6285                 case OP_PREFETCH_MEMBASE:
6286                         amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
6287                         break;
6288
6289                 case OP_XMOVE:
6290                         /*FIXME the peephole pass should have killed this*/
6291                         if (ins->dreg != ins->sreg1)
6292                                 amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1);
6293                         break;          
6294                 case OP_XZERO:
6295                         amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
6296                         break;
6297                 case OP_ICONV_TO_R4_RAW:
6298                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6299                         break;
6300
6301                 case OP_FCONV_TO_R8_X:
6302                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6303                         break;
6304
6305                 case OP_XCONV_R8_TO_I4:
6306                         amd64_sse_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6307                         switch (ins->backend.source_opcode) {
6308                         case OP_FCONV_TO_I1:
6309                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
6310                                 break;
6311                         case OP_FCONV_TO_U1:
6312                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
6313                                 break;
6314                         case OP_FCONV_TO_I2:
6315                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
6316                                 break;
6317                         case OP_FCONV_TO_U2:
6318                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
6319                                 break;
6320                         }                       
6321                         break;
6322
6323                 case OP_EXPAND_I2:
6324                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 0);
6325                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 1);
6326                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6327                         break;
6328                 case OP_EXPAND_I4:
6329                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6330                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6331                         break;
6332                 case OP_EXPAND_I8:
6333                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
6334                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
6335                         break;
6336                 case OP_EXPAND_R4:
6337                         if (cfg->r4fp) {
6338                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6339                         } else {
6340                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6341                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->dreg);
6342                         }
6343                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6344                         break;
6345                 case OP_EXPAND_R8:
6346                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6347                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
6348                         break;
6349 #endif
6350                 case OP_LIVERANGE_START: {
6351                         if (cfg->verbose_level > 1)
6352                                 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
6353                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
6354                         break;
6355                 }
6356                 case OP_LIVERANGE_END: {
6357                         if (cfg->verbose_level > 1)
6358                                 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
6359                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
6360                         break;
6361                 }
6362                 case OP_GC_SAFE_POINT: {
6363                         guint8 *br [1];
6364
6365                         g_assert (mono_threads_is_coop_enabled ());
6366
6367                         amd64_test_membase_imm_size (code, ins->sreg1, 0, 1, 4);
6368                         br[0] = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
6369                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_threads_state_poll", FALSE);
6370                         amd64_patch (br[0], code);
6371                         break;
6372                 }
6373
6374                 case OP_GC_LIVENESS_DEF:
6375                 case OP_GC_LIVENESS_USE:
6376                 case OP_GC_PARAM_SLOT_LIVENESS_DEF:
6377                         ins->backend.pc_offset = code - cfg->native_code;
6378                         break;
6379                 case OP_GC_SPILL_SLOT_LIVENESS_DEF:
6380                         ins->backend.pc_offset = code - cfg->native_code;
6381                         bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins);
6382                         break;
6383                 default:
6384                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
6385                         g_assert_not_reached ();
6386                 }
6387
6388                 if ((code - cfg->native_code - offset) > max_len) {
6389                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
6390                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
6391                         g_assert_not_reached ();
6392                 }
6393         }
6394
6395         cfg->code_len = code - cfg->native_code;
6396 }
6397
6398 #endif /* DISABLE_JIT */
6399
6400 void
6401 mono_arch_register_lowlevel_calls (void)
6402 {
6403         /* The signature doesn't matter */
6404         mono_register_jit_icall (mono_amd64_throw_exception, "mono_amd64_throw_exception", mono_create_icall_signature ("void"), TRUE);
6405 }
6406
6407 void
6408 mono_arch_patch_code_new (MonoCompile *cfg, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gpointer target)
6409 {
6410         unsigned char *ip = ji->ip.i + code;
6411
6412         /*
6413          * Debug code to help track down problems where the target of a near call is
6414          * is not valid.
6415          */
6416         if (amd64_is_near_call (ip)) {
6417                 gint64 disp = (guint8*)target - (guint8*)ip;
6418
6419                 if (!amd64_is_imm32 (disp)) {
6420                         printf ("TYPE: %d\n", ji->type);
6421                         switch (ji->type) {
6422                         case MONO_PATCH_INFO_INTERNAL_METHOD:
6423                                 printf ("V: %s\n", ji->data.name);
6424                                 break;
6425                         case MONO_PATCH_INFO_METHOD_JUMP:
6426                         case MONO_PATCH_INFO_METHOD:
6427                                 printf ("V: %s\n", ji->data.method->name);
6428                                 break;
6429                         default:
6430                                 break;
6431                         }
6432                 }
6433         }
6434
6435         amd64_patch (ip, (gpointer)target);
6436 }
6437
6438 #ifndef DISABLE_JIT
6439
6440 static int
6441 get_max_epilog_size (MonoCompile *cfg)
6442 {
6443         int max_epilog_size = 16;
6444         
6445         if (cfg->method->save_lmf)
6446                 max_epilog_size += 256;
6447         
6448         if (mono_jit_trace_calls != NULL)
6449                 max_epilog_size += 50;
6450
6451         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
6452                 max_epilog_size += 50;
6453
6454         max_epilog_size += (AMD64_NREG * 2);
6455
6456         return max_epilog_size;
6457 }
6458
6459 /*
6460  * This macro is used for testing whenever the unwinder works correctly at every point
6461  * where an async exception can happen.
6462  */
6463 /* This will generate a SIGSEGV at the given point in the code */
6464 #define async_exc_point(code) do { \
6465     if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
6466          if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
6467              amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
6468          cfg->arch.async_point_count ++; \
6469     } \
6470 } while (0)
6471
6472 guint8 *
6473 mono_arch_emit_prolog (MonoCompile *cfg)
6474 {
6475         MonoMethod *method = cfg->method;
6476         MonoBasicBlock *bb;
6477         MonoMethodSignature *sig;
6478         MonoInst *ins;
6479         int alloc_size, pos, i, cfa_offset, quad, max_epilog_size, save_area_offset;
6480         guint8 *code;
6481         CallInfo *cinfo;
6482         MonoInst *lmf_var = cfg->lmf_var;
6483         gboolean args_clobbered = FALSE;
6484         gboolean trace = FALSE;
6485
6486         cfg->code_size = MAX (cfg->header->code_size * 4, 1024);
6487
6488         code = cfg->native_code = (unsigned char *)g_malloc (cfg->code_size);
6489
6490         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
6491                 trace = TRUE;
6492
6493         /* Amount of stack space allocated by register saving code */
6494         pos = 0;
6495
6496         /* Offset between RSP and the CFA */
6497         cfa_offset = 0;
6498
6499         /* 
6500          * The prolog consists of the following parts:
6501          * FP present:
6502          * - push rbp, mov rbp, rsp
6503          * - save callee saved regs using pushes
6504          * - allocate frame
6505          * - save rgctx if needed
6506          * - save lmf if needed
6507          * FP not present:
6508          * - allocate frame
6509          * - save rgctx if needed
6510          * - save lmf if needed
6511          * - save callee saved regs using moves
6512          */
6513
6514         // CFA = sp + 8
6515         cfa_offset = 8;
6516         mono_emit_unwind_op_def_cfa (cfg, code, AMD64_RSP, 8);
6517         // IP saved at CFA - 8
6518         mono_emit_unwind_op_offset (cfg, code, AMD64_RIP, -cfa_offset);
6519         async_exc_point (code);
6520         mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset, SLOT_NOREF);
6521
6522         if (!cfg->arch.omit_fp) {
6523                 amd64_push_reg (code, AMD64_RBP);
6524                 cfa_offset += 8;
6525                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6526                 mono_emit_unwind_op_offset (cfg, code, AMD64_RBP, - cfa_offset);
6527                 async_exc_point (code);
6528 #ifdef TARGET_WIN32
6529                 mono_arch_unwindinfo_add_push_nonvol (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
6530 #endif
6531                 /* These are handled automatically by the stack marking code */
6532                 mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset, SLOT_NOREF);
6533                 
6534                 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t));
6535                 mono_emit_unwind_op_def_cfa_reg (cfg, code, AMD64_RBP);
6536                 async_exc_point (code);
6537 #ifdef TARGET_WIN32
6538                 mono_arch_unwindinfo_add_set_fpreg (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
6539 #endif
6540         }
6541
6542         /* The param area is always at offset 0 from sp */
6543         /* This needs to be allocated here, since it has to come after the spill area */
6544         if (cfg->param_area) {
6545                 if (cfg->arch.omit_fp)
6546                         // FIXME:
6547                         g_assert_not_reached ();
6548                 cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof(mgreg_t));
6549         }
6550
6551         if (cfg->arch.omit_fp) {
6552                 /* 
6553                  * On enter, the stack is misaligned by the pushing of the return
6554                  * address. It is either made aligned by the pushing of %rbp, or by
6555                  * this.
6556                  */
6557                 alloc_size = ALIGN_TO (cfg->stack_offset, 8);
6558                 if ((alloc_size % 16) == 0) {
6559                         alloc_size += 8;
6560                         /* Mark the padding slot as NOREF */
6561                         mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset - sizeof (mgreg_t), SLOT_NOREF);
6562                 }
6563         } else {
6564                 alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
6565                 if (cfg->stack_offset != alloc_size) {
6566                         /* Mark the padding slot as NOREF */
6567                         mini_gc_set_slot_type_from_fp (cfg, -alloc_size + cfg->param_area, SLOT_NOREF);
6568                 }
6569                 cfg->arch.sp_fp_offset = alloc_size;
6570                 alloc_size -= pos;
6571         }
6572
6573         cfg->arch.stack_alloc_size = alloc_size;
6574
6575         /* Allocate stack frame */
6576         if (alloc_size) {
6577                 /* See mono_emit_stack_alloc */
6578 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
6579                 guint32 remaining_size = alloc_size;
6580                 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
6581                 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 10; /*10 is the max size of amd64_alu_reg_imm + amd64_test_membase_reg*/
6582                 guint32 offset = code - cfg->native_code;
6583                 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
6584                         while (required_code_size >= (cfg->code_size - offset))
6585                                 cfg->code_size *= 2;
6586                         cfg->native_code = (unsigned char *)mono_realloc_native_code (cfg);
6587                         code = cfg->native_code + offset;
6588                         cfg->stat_code_reallocs++;
6589                 }
6590
6591                 while (remaining_size >= 0x1000) {
6592                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
6593                         if (cfg->arch.omit_fp) {
6594                                 cfa_offset += 0x1000;
6595                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6596                         }
6597                         async_exc_point (code);
6598 #ifdef TARGET_WIN32
6599                         if (cfg->arch.omit_fp) 
6600                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, 0x1000);
6601 #endif
6602
6603                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
6604                         remaining_size -= 0x1000;
6605                 }
6606                 if (remaining_size) {
6607                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
6608                         if (cfg->arch.omit_fp) {
6609                                 cfa_offset += remaining_size;
6610                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6611                                 async_exc_point (code);
6612                         }
6613 #ifdef TARGET_WIN32
6614                         if (cfg->arch.omit_fp) 
6615                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, remaining_size);
6616 #endif
6617                 }
6618 #else
6619                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
6620                 if (cfg->arch.omit_fp) {
6621                         cfa_offset += alloc_size;
6622                         mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6623                         async_exc_point (code);
6624                 }
6625 #endif
6626         }
6627
6628         /* Stack alignment check */
6629 #if 0
6630         {
6631                 guint8 *buf;
6632
6633                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
6634                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
6635                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
6636                 buf = code;
6637                 x86_branch8 (code, X86_CC_EQ, 1, FALSE);
6638                 amd64_breakpoint (code);
6639                 amd64_patch (buf, code);
6640         }
6641 #endif
6642
6643         if (mini_get_debug_options ()->init_stacks) {
6644                 /* Fill the stack frame with a dummy value to force deterministic behavior */
6645         
6646                 /* Save registers to the red zone */
6647                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDI, 8);
6648                 amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
6649
6650                 amd64_mov_reg_imm (code, AMD64_RAX, 0x2a2a2a2a2a2a2a2a);
6651                 amd64_mov_reg_imm (code, AMD64_RCX, alloc_size / 8);
6652                 amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RSP, 8);
6653
6654                 amd64_cld (code);
6655                 amd64_prefix (code, X86_REP_PREFIX);
6656                 amd64_stosl (code);
6657
6658                 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RSP, -8, 8);
6659                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
6660         }
6661
6662         /* Save LMF */
6663         if (method->save_lmf)
6664                 code = emit_setup_lmf (cfg, code, lmf_var->inst_offset, cfa_offset);
6665
6666         /* Save callee saved registers */
6667         if (cfg->arch.omit_fp) {
6668                 save_area_offset = cfg->arch.reg_save_area_offset;
6669                 /* Save caller saved registers after sp is adjusted */
6670                 /* The registers are saved at the bottom of the frame */
6671                 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
6672         } else {
6673                 /* The registers are saved just below the saved rbp */
6674                 save_area_offset = cfg->arch.reg_save_area_offset;
6675         }
6676
6677         for (i = 0; i < AMD64_NREG; ++i) {
6678                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
6679                         amd64_mov_membase_reg (code, cfg->frame_reg, save_area_offset, i, 8);
6680
6681                         if (cfg->arch.omit_fp) {
6682                                 mono_emit_unwind_op_offset (cfg, code, i, - (cfa_offset - save_area_offset));
6683                                 /* These are handled automatically by the stack marking code */
6684                                 mini_gc_set_slot_type_from_cfa (cfg, - (cfa_offset - save_area_offset), SLOT_NOREF);
6685                         } else {
6686                                 mono_emit_unwind_op_offset (cfg, code, i, - (-save_area_offset + (2 * 8)));
6687                                 // FIXME: GC
6688                         }
6689
6690                         save_area_offset += 8;
6691                         async_exc_point (code);
6692                 }
6693         }
6694
6695         /* store runtime generic context */
6696         if (cfg->rgctx_var) {
6697                 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
6698                                 (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP));
6699
6700                 amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, sizeof(gpointer));
6701
6702                 mono_add_var_location (cfg, cfg->rgctx_var, TRUE, MONO_ARCH_RGCTX_REG, 0, 0, code - cfg->native_code);
6703                 mono_add_var_location (cfg, cfg->rgctx_var, FALSE, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, code - cfg->native_code, 0);
6704         }
6705
6706         /* compute max_length in order to use short forward jumps */
6707         max_epilog_size = get_max_epilog_size (cfg);
6708         if (cfg->opt & MONO_OPT_BRANCH) {
6709                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
6710                         MonoInst *ins;
6711                         int max_length = 0;
6712
6713                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
6714                                 max_length += 6;
6715                         /* max alignment for loops */
6716                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
6717                                 max_length += LOOP_ALIGNMENT;
6718
6719                         MONO_BB_FOR_EACH_INS (bb, ins) {
6720                                 max_length += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
6721                         }
6722
6723                         /* Take prolog and epilog instrumentation into account */
6724                         if (bb == cfg->bb_entry || bb == cfg->bb_exit)
6725                                 max_length += max_epilog_size;
6726                         
6727                         bb->max_length = max_length;
6728                 }
6729         }
6730
6731         sig = mono_method_signature (method);
6732         pos = 0;
6733
6734         cinfo = (CallInfo *)cfg->arch.cinfo;
6735
6736         if (sig->ret->type != MONO_TYPE_VOID) {
6737                 /* Save volatile arguments to the stack */
6738                 if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
6739                         amd64_mov_membase_reg (code, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, cinfo->ret.reg, 8);
6740         }
6741
6742         /* Keep this in sync with emit_load_volatile_arguments */
6743         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
6744                 ArgInfo *ainfo = cinfo->args + i;
6745
6746                 ins = cfg->args [i];
6747
6748                 if ((ins->flags & MONO_INST_IS_DEAD) && !trace)
6749                         /* Unused arguments */
6750                         continue;
6751
6752                 /* Save volatile arguments to the stack */
6753                 if (ins->opcode != OP_REGVAR) {
6754                         switch (ainfo->storage) {
6755                         case ArgInIReg: {
6756                                 guint32 size = 8;
6757
6758                                 /* FIXME: I1 etc */
6759                                 /*
6760                                 if (stack_offset & 0x1)
6761                                         size = 1;
6762                                 else if (stack_offset & 0x2)
6763                                         size = 2;
6764                                 else if (stack_offset & 0x4)
6765                                         size = 4;
6766                                 else
6767                                         size = 8;
6768                                 */
6769                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, size);
6770
6771                                 /*
6772                                  * Save the original location of 'this',
6773                                  * get_generic_info_from_stack_frame () needs this to properly look up
6774                                  * the argument value during the handling of async exceptions.
6775                                  */
6776                                 if (ins == cfg->args [0]) {
6777                                         mono_add_var_location (cfg, ins, TRUE, ainfo->reg, 0, 0, code - cfg->native_code);
6778                                         mono_add_var_location (cfg, ins, FALSE, ins->inst_basereg, ins->inst_offset, code - cfg->native_code, 0);
6779                                 }
6780                                 break;
6781                         }
6782                         case ArgInFloatSSEReg:
6783                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
6784                                 break;
6785                         case ArgInDoubleSSEReg:
6786                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
6787                                 break;
6788                         case ArgValuetypeInReg:
6789                                 for (quad = 0; quad < 2; quad ++) {
6790                                         switch (ainfo->pair_storage [quad]) {
6791                                         case ArgInIReg:
6792                                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad], sizeof(mgreg_t));
6793                                                 break;
6794                                         case ArgInFloatSSEReg:
6795                                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]);
6796                                                 break;
6797                                         case ArgInDoubleSSEReg:
6798                                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]);
6799                                                 break;
6800                                         case ArgNone:
6801                                                 break;
6802                                         default:
6803                                                 g_assert_not_reached ();
6804                                         }
6805                                 }
6806                                 break;
6807                         case ArgValuetypeAddrInIReg:
6808                                 if (ainfo->pair_storage [0] == ArgInIReg)
6809                                         amd64_mov_membase_reg (code, ins->inst_left->inst_basereg, ins->inst_left->inst_offset, ainfo->pair_regs [0],  sizeof (gpointer));
6810                                 break;
6811                         case ArgGSharedVtInReg:
6812                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, 8);
6813                                 break;
6814                         default:
6815                                 break;
6816                         }
6817                 } else {
6818                         /* Argument allocated to (non-volatile) register */
6819                         switch (ainfo->storage) {
6820                         case ArgInIReg:
6821                                 amd64_mov_reg_reg (code, ins->dreg, ainfo->reg, 8);
6822                                 break;
6823                         case ArgOnStack:
6824                                 amd64_mov_reg_membase (code, ins->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
6825                                 break;
6826                         default:
6827                                 g_assert_not_reached ();
6828                         }
6829
6830                         if (ins == cfg->args [0]) {
6831                                 mono_add_var_location (cfg, ins, TRUE, ainfo->reg, 0, 0, code - cfg->native_code);
6832                                 mono_add_var_location (cfg, ins, TRUE, ins->dreg, 0, code - cfg->native_code, 0);
6833                         }
6834                 }
6835         }
6836
6837         if (cfg->method->save_lmf)
6838                 args_clobbered = TRUE;
6839
6840         if (trace) {
6841                 args_clobbered = TRUE;
6842                 code = (guint8 *)mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
6843         }
6844
6845         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
6846                 args_clobbered = TRUE;
6847
6848         /*
6849          * Optimize the common case of the first bblock making a call with the same
6850          * arguments as the method. This works because the arguments are still in their
6851          * original argument registers.
6852          * FIXME: Generalize this
6853          */
6854         if (!args_clobbered) {
6855                 MonoBasicBlock *first_bb = cfg->bb_entry;
6856                 MonoInst *next;
6857                 int filter = FILTER_IL_SEQ_POINT;
6858
6859                 next = mono_bb_first_inst (first_bb, filter);
6860                 if (!next && first_bb->next_bb) {
6861                         first_bb = first_bb->next_bb;
6862                         next = mono_bb_first_inst (first_bb, filter);
6863                 }
6864
6865                 if (first_bb->in_count > 1)
6866                         next = NULL;
6867
6868                 for (i = 0; next && i < sig->param_count + sig->hasthis; ++i) {
6869                         ArgInfo *ainfo = cinfo->args + i;
6870                         gboolean match = FALSE;
6871
6872                         ins = cfg->args [i];
6873                         if (ins->opcode != OP_REGVAR) {
6874                                 switch (ainfo->storage) {
6875                                 case ArgInIReg: {
6876                                         if (((next->opcode == OP_LOAD_MEMBASE) || (next->opcode == OP_LOADI4_MEMBASE)) && next->inst_basereg == ins->inst_basereg && next->inst_offset == ins->inst_offset) {
6877                                                 if (next->dreg == ainfo->reg) {
6878                                                         NULLIFY_INS (next);
6879                                                         match = TRUE;
6880                                                 } else {
6881                                                         next->opcode = OP_MOVE;
6882                                                         next->sreg1 = ainfo->reg;
6883                                                         /* Only continue if the instruction doesn't change argument regs */
6884                                                         if (next->dreg == ainfo->reg || next->dreg == AMD64_RAX)
6885                                                                 match = TRUE;
6886                                                 }
6887                                         }
6888                                         break;
6889                                 }
6890                                 default:
6891                                         break;
6892                                 }
6893                         } else {
6894                                 /* Argument allocated to (non-volatile) register */
6895                                 switch (ainfo->storage) {
6896                                 case ArgInIReg:
6897                                         if (next->opcode == OP_MOVE && next->sreg1 == ins->dreg && next->dreg == ainfo->reg) {
6898                                                 NULLIFY_INS (next);
6899                                                 match = TRUE;
6900                                         }
6901                                         break;
6902                                 default:
6903                                         break;
6904                                 }
6905                         }
6906
6907                         if (match) {
6908                                 next = mono_inst_next (next, filter);
6909                                 //next = mono_inst_list_next (&next->node, &first_bb->ins_list);
6910                                 if (!next)
6911                                         break;
6912                         }
6913                 }
6914         }
6915
6916         if (cfg->gen_sdb_seq_points) {
6917                 MonoInst *info_var = (MonoInst *)cfg->arch.seq_point_info_var;
6918
6919                 /* Initialize seq_point_info_var */
6920                 if (cfg->compile_aot) {
6921                         /* Initialize the variable from a GOT slot */
6922                         /* Same as OP_AOTCONST */
6923                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_SEQ_POINT_INFO, cfg->method);
6924                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, sizeof(gpointer));
6925                         g_assert (info_var->opcode == OP_REGOFFSET);
6926                         amd64_mov_membase_reg (code, info_var->inst_basereg, info_var->inst_offset, AMD64_R11, 8);
6927                 }
6928
6929                 if (cfg->compile_aot) {
6930                         /* Initialize ss_tramp_var */
6931                         ins = (MonoInst *)cfg->arch.ss_tramp_var;
6932                         g_assert (ins->opcode == OP_REGOFFSET);
6933
6934                         amd64_mov_reg_membase (code, AMD64_R11, info_var->inst_basereg, info_var->inst_offset, 8);
6935                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr), 8);
6936                         amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
6937                 } else {
6938                         /* Initialize ss_tramp_var */
6939                         ins = (MonoInst *)cfg->arch.ss_tramp_var;
6940                         g_assert (ins->opcode == OP_REGOFFSET);
6941
6942                         amd64_mov_reg_imm (code, AMD64_R11, (guint64)&ss_trampoline);
6943                         amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
6944
6945                         /* Initialize bp_tramp_var */
6946                         ins = (MonoInst *)cfg->arch.bp_tramp_var;
6947                         g_assert (ins->opcode == OP_REGOFFSET);
6948
6949                         amd64_mov_reg_imm (code, AMD64_R11, (guint64)&bp_trampoline);
6950                         amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
6951                 }
6952         }
6953
6954         cfg->code_len = code - cfg->native_code;
6955
6956         g_assert (cfg->code_len < cfg->code_size);
6957
6958         return code;
6959 }
6960
6961 void
6962 mono_arch_emit_epilog (MonoCompile *cfg)
6963 {
6964         MonoMethod *method = cfg->method;
6965         int quad, i;
6966         guint8 *code;
6967         int max_epilog_size;
6968         CallInfo *cinfo;
6969         gint32 lmf_offset = cfg->lmf_var ? ((MonoInst*)cfg->lmf_var)->inst_offset : -1;
6970         gint32 save_area_offset = cfg->arch.reg_save_area_offset;
6971
6972         max_epilog_size = get_max_epilog_size (cfg);
6973
6974         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
6975                 cfg->code_size *= 2;
6976                 cfg->native_code = (unsigned char *)mono_realloc_native_code (cfg);
6977                 cfg->stat_code_reallocs++;
6978         }
6979         code = cfg->native_code + cfg->code_len;
6980
6981         cfg->has_unwind_info_for_epilog = TRUE;
6982
6983         /* Mark the start of the epilog */
6984         mono_emit_unwind_op_mark_loc (cfg, code, 0);
6985
6986         /* Save the uwind state which is needed by the out-of-line code */
6987         mono_emit_unwind_op_remember_state (cfg, code);
6988
6989         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
6990                 code = (guint8 *)mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
6991
6992         /* the code restoring the registers must be kept in sync with OP_TAILCALL */
6993         
6994         if (method->save_lmf) {
6995                 /* check if we need to restore protection of the stack after a stack overflow */
6996                 if (!cfg->compile_aot && mono_get_jit_tls_offset () != -1) {
6997                         guint8 *patch;
6998                         code = mono_amd64_emit_tls_get (code, AMD64_RCX, mono_get_jit_tls_offset ());
6999                         /* we load the value in a separate instruction: this mechanism may be
7000                          * used later as a safer way to do thread interruption
7001                          */
7002                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RCX, MONO_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 8);
7003                         x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
7004                         patch = code;
7005                         x86_branch8 (code, X86_CC_Z, 0, FALSE);
7006                         /* note that the call trampoline will preserve eax/edx */
7007                         x86_call_reg (code, X86_ECX);
7008                         x86_patch (patch, code);
7009                 } else {
7010                         /* FIXME: maybe save the jit tls in the prolog */
7011                 }
7012                 if (cfg->used_int_regs & (1 << AMD64_RBP)) {
7013                         amd64_mov_reg_membase (code, AMD64_RBP, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), 8);
7014                 }
7015         }
7016
7017         /* Restore callee saved regs */
7018         for (i = 0; i < AMD64_NREG; ++i) {
7019                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
7020                         /* Restore only used_int_regs, not arch.saved_iregs */
7021                         if (cfg->used_int_regs & (1 << i)) {
7022                                 amd64_mov_reg_membase (code, i, cfg->frame_reg, save_area_offset, 8);
7023                                 mono_emit_unwind_op_same_value (cfg, code, i);
7024                                 async_exc_point (code);
7025                         }
7026                         save_area_offset += 8;
7027                 }
7028         }
7029
7030         /* Load returned vtypes into registers if needed */
7031         cinfo = (CallInfo *)cfg->arch.cinfo;
7032         if (cinfo->ret.storage == ArgValuetypeInReg) {
7033                 ArgInfo *ainfo = &cinfo->ret;
7034                 MonoInst *inst = cfg->ret;
7035
7036                 for (quad = 0; quad < 2; quad ++) {
7037                         switch (ainfo->pair_storage [quad]) {
7038                         case ArgInIReg:
7039                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_size [quad]);
7040                                 break;
7041                         case ArgInFloatSSEReg:
7042                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)));
7043                                 break;
7044                         case ArgInDoubleSSEReg:
7045                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)));
7046                                 break;
7047                         case ArgNone:
7048                                 break;
7049                         default:
7050                                 g_assert_not_reached ();
7051                         }
7052                 }
7053         }
7054
7055         if (cfg->arch.omit_fp) {
7056                 if (cfg->arch.stack_alloc_size) {
7057                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
7058                 }
7059         } else {
7060                 amd64_leave (code);
7061                 mono_emit_unwind_op_same_value (cfg, code, AMD64_RBP);
7062         }
7063         mono_emit_unwind_op_def_cfa (cfg, code, AMD64_RSP, 8);
7064         async_exc_point (code);
7065         amd64_ret (code);
7066
7067         /* Restore the unwind state to be the same as before the epilog */
7068         mono_emit_unwind_op_restore_state (cfg, code);
7069
7070         cfg->code_len = code - cfg->native_code;
7071
7072         g_assert (cfg->code_len < cfg->code_size);
7073 }
7074
7075 void
7076 mono_arch_emit_exceptions (MonoCompile *cfg)
7077 {
7078         MonoJumpInfo *patch_info;
7079         int nthrows, i;
7080         guint8 *code;
7081         MonoClass *exc_classes [16];
7082         guint8 *exc_throw_start [16], *exc_throw_end [16];
7083         guint32 code_size = 0;
7084
7085         /* Compute needed space */
7086         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7087                 if (patch_info->type == MONO_PATCH_INFO_EXC)
7088                         code_size += 40;
7089                 if (patch_info->type == MONO_PATCH_INFO_R8)
7090                         code_size += 8 + 15; /* sizeof (double) + alignment */
7091                 if (patch_info->type == MONO_PATCH_INFO_R4)
7092                         code_size += 4 + 15; /* sizeof (float) + alignment */
7093                 if (patch_info->type == MONO_PATCH_INFO_GC_CARD_TABLE_ADDR)
7094                         code_size += 8 + 7; /*sizeof (void*) + alignment */
7095         }
7096
7097         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
7098                 cfg->code_size *= 2;
7099                 cfg->native_code = (unsigned char *)mono_realloc_native_code (cfg);
7100                 cfg->stat_code_reallocs++;
7101         }
7102
7103         code = cfg->native_code + cfg->code_len;
7104
7105         /* add code to raise exceptions */
7106         nthrows = 0;
7107         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7108                 switch (patch_info->type) {
7109                 case MONO_PATCH_INFO_EXC: {
7110                         MonoClass *exc_class;
7111                         guint8 *buf, *buf2;
7112                         guint32 throw_ip;
7113
7114                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
7115
7116                         exc_class = mono_class_load_from_name (mono_defaults.corlib, "System", patch_info->data.name);
7117                         throw_ip = patch_info->ip.i;
7118
7119                         //x86_breakpoint (code);
7120                         /* Find a throw sequence for the same exception class */
7121                         for (i = 0; i < nthrows; ++i)
7122                                 if (exc_classes [i] == exc_class)
7123                                         break;
7124                         if (i < nthrows) {
7125                                 amd64_mov_reg_imm (code, AMD64_ARG_REG2, (exc_throw_end [i] - cfg->native_code) - throw_ip);
7126                                 x86_jump_code (code, exc_throw_start [i]);
7127                                 patch_info->type = MONO_PATCH_INFO_NONE;
7128                         }
7129                         else {
7130                                 buf = code;
7131                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG2, 0xf0f0f0f0, 4);
7132                                 buf2 = code;
7133
7134                                 if (nthrows < 16) {
7135                                         exc_classes [nthrows] = exc_class;
7136                                         exc_throw_start [nthrows] = code;
7137                                 }
7138                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
7139
7140                                 patch_info->type = MONO_PATCH_INFO_NONE;
7141
7142                                 code = emit_call_body (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_arch_throw_corlib_exception");
7143
7144                                 amd64_mov_reg_imm (buf, AMD64_ARG_REG2, (code - cfg->native_code) - throw_ip);
7145                                 while (buf < buf2)
7146                                         x86_nop (buf);
7147
7148                                 if (nthrows < 16) {
7149                                         exc_throw_end [nthrows] = code;
7150                                         nthrows ++;
7151                                 }
7152                         }
7153                         break;
7154                 }
7155                 default:
7156                         /* do nothing */
7157                         break;
7158                 }
7159                 g_assert(code < cfg->native_code + cfg->code_size);
7160         }
7161
7162         /* Handle relocations with RIP relative addressing */
7163         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7164                 gboolean remove = FALSE;
7165                 guint8 *orig_code = code;
7166
7167                 switch (patch_info->type) {
7168                 case MONO_PATCH_INFO_R8:
7169                 case MONO_PATCH_INFO_R4: {
7170                         guint8 *pos, *patch_pos;
7171                         guint32 target_pos;
7172
7173                         /* The SSE opcodes require a 16 byte alignment */
7174                         code = (guint8*)ALIGN_TO (code, 16);
7175
7176                         pos = cfg->native_code + patch_info->ip.i;
7177                         if (IS_REX (pos [1])) {
7178                                 patch_pos = pos + 5;
7179                                 target_pos = code - pos - 9;
7180                         }
7181                         else {
7182                                 patch_pos = pos + 4;
7183                                 target_pos = code - pos - 8;
7184                         }
7185
7186                         if (patch_info->type == MONO_PATCH_INFO_R8) {
7187                                 *(double*)code = *(double*)patch_info->data.target;
7188                                 code += sizeof (double);
7189                         } else {
7190                                 *(float*)code = *(float*)patch_info->data.target;
7191                                 code += sizeof (float);
7192                         }
7193
7194                         *(guint32*)(patch_pos) = target_pos;
7195
7196                         remove = TRUE;
7197                         break;
7198                 }
7199                 case MONO_PATCH_INFO_GC_CARD_TABLE_ADDR: {
7200                         guint8 *pos;
7201
7202                         if (cfg->compile_aot)
7203                                 continue;
7204
7205                         /*loading is faster against aligned addresses.*/
7206                         code = (guint8*)ALIGN_TO (code, 8);
7207                         memset (orig_code, 0, code - orig_code);
7208
7209                         pos = cfg->native_code + patch_info->ip.i;
7210
7211                         /*alu_op [rex] modr/m imm32 - 7 or 8 bytes */
7212                         if (IS_REX (pos [1]))
7213                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
7214                         else
7215                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
7216
7217                         *(gpointer*)code = (gpointer)patch_info->data.target;
7218                         code += sizeof (gpointer);
7219
7220                         remove = TRUE;
7221                         break;
7222                 }
7223                 default:
7224                         break;
7225                 }
7226
7227                 if (remove) {
7228                         if (patch_info == cfg->patch_info)
7229                                 cfg->patch_info = patch_info->next;
7230                         else {
7231                                 MonoJumpInfo *tmp;
7232
7233                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
7234                                         ;
7235                                 tmp->next = patch_info->next;
7236                         }
7237                 }
7238                 g_assert (code < cfg->native_code + cfg->code_size);
7239         }
7240
7241         cfg->code_len = code - cfg->native_code;
7242
7243         g_assert (cfg->code_len < cfg->code_size);
7244
7245 }
7246
7247 #endif /* DISABLE_JIT */
7248
7249 void*
7250 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
7251 {
7252         guchar *code = (guchar *)p;
7253         MonoMethodSignature *sig;
7254         MonoInst *inst;
7255         int i, n, stack_area = 0;
7256
7257         /* Keep this in sync with mono_arch_get_argument_info */
7258
7259         if (enable_arguments) {
7260                 /* Allocate a new area on the stack and save arguments there */
7261                 sig = mono_method_signature (cfg->method);
7262
7263                 n = sig->param_count + sig->hasthis;
7264
7265                 stack_area = ALIGN_TO (n * 8, 16);
7266
7267                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
7268
7269                 for (i = 0; i < n; ++i) {
7270                         inst = cfg->args [i];
7271
7272                         if (inst->opcode == OP_REGVAR)
7273                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
7274                         else {
7275                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
7276                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
7277                         }
7278                 }
7279         }
7280
7281         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
7282         amd64_set_reg_template (code, AMD64_ARG_REG1);
7283         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RSP, 8);
7284         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
7285
7286         if (enable_arguments)
7287                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
7288
7289         return code;
7290 }
7291
7292 enum {
7293         SAVE_NONE,
7294         SAVE_STRUCT,
7295         SAVE_EAX,
7296         SAVE_EAX_EDX,
7297         SAVE_XMM
7298 };
7299
7300 void*
7301 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
7302 {
7303         guchar *code = (guchar *)p;
7304         int save_mode = SAVE_NONE;
7305         MonoMethod *method = cfg->method;
7306         MonoType *ret_type = mini_get_underlying_type (mono_method_signature (method)->ret);
7307         int i;
7308         
7309         switch (ret_type->type) {
7310         case MONO_TYPE_VOID:
7311                 /* special case string .ctor icall */
7312                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
7313                         save_mode = SAVE_EAX;
7314                 else
7315                         save_mode = SAVE_NONE;
7316                 break;
7317         case MONO_TYPE_I8:
7318         case MONO_TYPE_U8:
7319                 save_mode = SAVE_EAX;
7320                 break;
7321         case MONO_TYPE_R4:
7322         case MONO_TYPE_R8:
7323                 save_mode = SAVE_XMM;
7324                 break;
7325         case MONO_TYPE_GENERICINST:
7326                 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
7327                         save_mode = SAVE_EAX;
7328                         break;
7329                 }
7330                 /* Fall through */
7331         case MONO_TYPE_VALUETYPE:
7332                 save_mode = SAVE_STRUCT;
7333                 break;
7334         default:
7335                 save_mode = SAVE_EAX;
7336                 break;
7337         }
7338
7339         /* Save the result and copy it into the proper argument register */
7340         switch (save_mode) {
7341         case SAVE_EAX:
7342                 amd64_push_reg (code, AMD64_RAX);
7343                 /* Align stack */
7344                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7345                 if (enable_arguments)
7346                         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RAX, 8);
7347                 break;
7348         case SAVE_STRUCT:
7349                 /* FIXME: */
7350                 if (enable_arguments)
7351                         amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0);
7352                 break;
7353         case SAVE_XMM:
7354                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7355                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
7356                 /* Align stack */
7357                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7358                 /* 
7359                  * The result is already in the proper argument register so no copying
7360                  * needed.
7361                  */
7362                 break;
7363         case SAVE_NONE:
7364                 break;
7365         default:
7366                 g_assert_not_reached ();
7367         }
7368
7369         /* Set %al since this is a varargs call */
7370         if (save_mode == SAVE_XMM)
7371                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
7372         else
7373                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
7374
7375         if (preserve_argument_registers) {
7376                 for (i = 0; i < PARAM_REGS; ++i)
7377                         amd64_push_reg (code, param_regs [i]);
7378         }
7379
7380         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
7381         amd64_set_reg_template (code, AMD64_ARG_REG1);
7382         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
7383
7384         if (preserve_argument_registers) {
7385                 for (i = PARAM_REGS - 1; i >= 0; --i)
7386                         amd64_pop_reg (code, param_regs [i]);
7387         }
7388
7389         /* Restore result */
7390         switch (save_mode) {
7391         case SAVE_EAX:
7392                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7393                 amd64_pop_reg (code, AMD64_RAX);
7394                 break;
7395         case SAVE_STRUCT:
7396                 /* FIXME: */
7397                 break;
7398         case SAVE_XMM:
7399                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7400                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
7401                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7402                 break;
7403         case SAVE_NONE:
7404                 break;
7405         default:
7406                 g_assert_not_reached ();
7407         }
7408
7409         return code;
7410 }
7411
7412 void
7413 mono_arch_flush_icache (guint8 *code, gint size)
7414 {
7415         /* Not needed */
7416 }
7417
7418 void
7419 mono_arch_flush_register_windows (void)
7420 {
7421 }
7422
7423 gboolean 
7424 mono_arch_is_inst_imm (gint64 imm)
7425 {
7426         return amd64_use_imm32 (imm);
7427 }
7428
7429 /*
7430  * Determine whenever the trap whose info is in SIGINFO is caused by
7431  * integer overflow.
7432  */
7433 gboolean
7434 mono_arch_is_int_overflow (void *sigctx, void *info)
7435 {
7436         MonoContext ctx;
7437         guint8* rip;
7438         int reg;
7439         gint64 value;
7440
7441         mono_sigctx_to_monoctx (sigctx, &ctx);
7442
7443         rip = (guint8*)ctx.gregs [AMD64_RIP];
7444
7445         if (IS_REX (rip [0])) {
7446                 reg = amd64_rex_b (rip [0]);
7447                 rip ++;
7448         }
7449         else
7450                 reg = 0;
7451
7452         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
7453                 /* idiv REG */
7454                 reg += x86_modrm_rm (rip [1]);
7455
7456                 value = ctx.gregs [reg];
7457
7458                 if (value == -1)
7459                         return TRUE;
7460         }
7461
7462         return FALSE;
7463 }
7464
7465 guint32
7466 mono_arch_get_patch_offset (guint8 *code)
7467 {
7468         return 3;
7469 }
7470
7471 /**
7472  * mono_breakpoint_clean_code:
7473  *
7474  * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
7475  * breakpoints in the original code, they are removed in the copy.
7476  *
7477  * Returns TRUE if no sw breakpoint was present.
7478  */
7479 gboolean
7480 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
7481 {
7482         /*
7483          * If method_start is non-NULL we need to perform bound checks, since we access memory
7484          * at code - offset we could go before the start of the method and end up in a different
7485          * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
7486          * instead.
7487          */
7488         if (!method_start || code - offset >= method_start) {
7489                 memcpy (buf, code - offset, size);
7490         } else {
7491                 int diff = code - method_start;
7492                 memset (buf, 0, size);
7493                 memcpy (buf + offset - diff, method_start, diff + size - offset);
7494         }
7495         return TRUE;
7496 }
7497
7498 int
7499 mono_arch_get_this_arg_reg (guint8 *code)
7500 {
7501         return AMD64_ARG_REG1;
7502 }
7503
7504 gpointer
7505 mono_arch_get_this_arg_from_call (mgreg_t *regs, guint8 *code)
7506 {
7507         return (gpointer)regs [mono_arch_get_this_arg_reg (code)];
7508 }
7509
7510 #define MAX_ARCH_DELEGATE_PARAMS 10
7511
7512 static gpointer
7513 get_delegate_invoke_impl (MonoTrampInfo **info, gboolean has_target, guint32 param_count)
7514 {
7515         guint8 *code, *start;
7516         GSList *unwind_ops = NULL;
7517         int i;
7518
7519         unwind_ops = mono_arch_get_cie_program ();
7520
7521         if (has_target) {
7522                 start = code = (guint8 *)mono_global_codeman_reserve (64);
7523
7524                 /* Replace the this argument with the target */
7525                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7526                 amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, target), 8);
7527                 amd64_jump_membase (code, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7528
7529                 g_assert ((code - start) < 64);
7530         } else {
7531                 start = code = (guint8 *)mono_global_codeman_reserve (64);
7532
7533                 if (param_count == 0) {
7534                         amd64_jump_membase (code, AMD64_ARG_REG1, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7535                 } else {
7536                         /* We have to shift the arguments left */
7537                         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7538                         for (i = 0; i < param_count; ++i) {
7539 #ifdef TARGET_WIN32
7540                                 if (i < 3)
7541                                         amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7542                                 else
7543                                         amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
7544 #else
7545                                 amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7546 #endif
7547                         }
7548
7549                         amd64_jump_membase (code, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7550                 }
7551                 g_assert ((code - start) < 64);
7552         }
7553
7554         mono_arch_flush_icache (start, code - start);
7555
7556         if (has_target) {
7557                 *info = mono_tramp_info_create ("delegate_invoke_impl_has_target", start, code - start, NULL, unwind_ops);
7558         } else {
7559                 char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", param_count);
7560                 *info = mono_tramp_info_create (name, start, code - start, NULL, unwind_ops);
7561                 g_free (name);
7562         }
7563
7564         if (mono_jit_map_is_enabled ()) {
7565                 char *buff;
7566                 if (has_target)
7567                         buff = (char*)"delegate_invoke_has_target";
7568                 else
7569                         buff = g_strdup_printf ("delegate_invoke_no_target_%d", param_count);
7570                 mono_emit_jit_tramp (start, code - start, buff);
7571                 if (!has_target)
7572                         g_free (buff);
7573         }
7574         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
7575
7576         return start;
7577 }
7578
7579 #define MAX_VIRTUAL_DELEGATE_OFFSET 32
7580
7581 static gpointer
7582 get_delegate_virtual_invoke_impl (MonoTrampInfo **info, gboolean load_imt_reg, int offset)
7583 {
7584         guint8 *code, *start;
7585         int size = 20;
7586         char *tramp_name;
7587         GSList *unwind_ops;
7588
7589         if (offset / (int)sizeof (gpointer) > MAX_VIRTUAL_DELEGATE_OFFSET)
7590                 return NULL;
7591
7592         start = code = (guint8 *)mono_global_codeman_reserve (size);
7593
7594         unwind_ops = mono_arch_get_cie_program ();
7595
7596         /* Replace the this argument with the target */
7597         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7598         amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, target), 8);
7599
7600         if (load_imt_reg) {
7601                 /* Load the IMT reg */
7602                 amd64_mov_reg_membase (code, MONO_ARCH_IMT_REG, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method), 8);
7603         }
7604
7605         /* Load the vtable */
7606         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_ARG_REG1, MONO_STRUCT_OFFSET (MonoObject, vtable), 8);
7607         amd64_jump_membase (code, AMD64_RAX, offset);
7608         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
7609
7610         if (load_imt_reg)
7611                 tramp_name = g_strdup_printf ("delegate_virtual_invoke_imt_%d", - offset / sizeof (gpointer));
7612         else
7613                 tramp_name = g_strdup_printf ("delegate_virtual_invoke_%d", offset / sizeof (gpointer));
7614         *info = mono_tramp_info_create (tramp_name, start, code - start, NULL, unwind_ops);
7615         g_free (tramp_name);
7616
7617         return start;
7618 }
7619
7620 /*
7621  * mono_arch_get_delegate_invoke_impls:
7622  *
7623  *   Return a list of MonoTrampInfo structures for the delegate invoke impl
7624  * trampolines.
7625  */
7626 GSList*
7627 mono_arch_get_delegate_invoke_impls (void)
7628 {
7629         GSList *res = NULL;
7630         MonoTrampInfo *info;
7631         int i;
7632
7633         get_delegate_invoke_impl (&info, TRUE, 0);
7634         res = g_slist_prepend (res, info);
7635
7636         for (i = 0; i <= MAX_ARCH_DELEGATE_PARAMS; ++i) {
7637                 get_delegate_invoke_impl (&info, FALSE, i);
7638                 res = g_slist_prepend (res, info);
7639         }
7640
7641         for (i = 0; i <= MAX_VIRTUAL_DELEGATE_OFFSET; ++i) {
7642                 get_delegate_virtual_invoke_impl (&info, TRUE, - i * SIZEOF_VOID_P);
7643                 res = g_slist_prepend (res, info);
7644
7645                 get_delegate_virtual_invoke_impl (&info, FALSE, i * SIZEOF_VOID_P);
7646                 res = g_slist_prepend (res, info);
7647         }
7648
7649         return res;
7650 }
7651
7652 gpointer
7653 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
7654 {
7655         guint8 *code, *start;
7656         int i;
7657
7658         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
7659                 return NULL;
7660
7661         /* FIXME: Support more cases */
7662         if (MONO_TYPE_ISSTRUCT (mini_get_underlying_type (sig->ret)))
7663                 return NULL;
7664
7665         if (has_target) {
7666                 static guint8* cached = NULL;
7667
7668                 if (cached)
7669                         return cached;
7670
7671                 if (mono_aot_only) {
7672                         start = (guint8 *)mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
7673                 } else {
7674                         MonoTrampInfo *info;
7675                         start = (guint8 *)get_delegate_invoke_impl (&info, TRUE, 0);
7676                         mono_tramp_info_register (info, NULL);
7677                 }
7678
7679                 mono_memory_barrier ();
7680
7681                 cached = start;
7682         } else {
7683                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
7684                 for (i = 0; i < sig->param_count; ++i)
7685                         if (!mono_is_regsize_var (sig->params [i]))
7686                                 return NULL;
7687                 if (sig->param_count > 4)
7688                         return NULL;
7689
7690                 code = cache [sig->param_count];
7691                 if (code)
7692                         return code;
7693
7694                 if (mono_aot_only) {
7695                         char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
7696                         start = (guint8 *)mono_aot_get_trampoline (name);
7697                         g_free (name);
7698                 } else {
7699                         MonoTrampInfo *info;
7700                         start = (guint8 *)get_delegate_invoke_impl (&info, FALSE, sig->param_count);
7701                         mono_tramp_info_register (info, NULL);
7702                 }
7703
7704                 mono_memory_barrier ();
7705
7706                 cache [sig->param_count] = start;
7707         }
7708
7709         return start;
7710 }
7711
7712 gpointer
7713 mono_arch_get_delegate_virtual_invoke_impl (MonoMethodSignature *sig, MonoMethod *method, int offset, gboolean load_imt_reg)
7714 {
7715         MonoTrampInfo *info;
7716         gpointer code;
7717
7718         code = get_delegate_virtual_invoke_impl (&info, load_imt_reg, offset);
7719         if (code)
7720                 mono_tramp_info_register (info, NULL);
7721         return code;
7722 }
7723
7724 void
7725 mono_arch_finish_init (void)
7726 {
7727 #if !defined(HOST_WIN32) && defined(MONO_XEN_OPT)
7728         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
7729 #endif
7730 }
7731
7732 void
7733 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
7734 {
7735 }
7736
7737 #define CMP_SIZE (6 + 1)
7738 #define CMP_REG_REG_SIZE (4 + 1)
7739 #define BR_SMALL_SIZE 2
7740 #define BR_LARGE_SIZE 6
7741 #define MOV_REG_IMM_SIZE 10
7742 #define MOV_REG_IMM_32BIT_SIZE 6
7743 #define JUMP_REG_SIZE (2 + 1)
7744
7745 static int
7746 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
7747 {
7748         int i, distance = 0;
7749         for (i = start; i < target; ++i)
7750                 distance += imt_entries [i]->chunk_size;
7751         return distance;
7752 }
7753
7754 /*
7755  * LOCKING: called with the domain lock held
7756  */
7757 gpointer
7758 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
7759         gpointer fail_tramp)
7760 {
7761         int i;
7762         int size = 0;
7763         guint8 *code, *start;
7764         gboolean vtable_is_32bit = ((gsize)(vtable) == (gsize)(int)(gsize)(vtable));
7765         GSList *unwind_ops;
7766
7767         for (i = 0; i < count; ++i) {
7768                 MonoIMTCheckItem *item = imt_entries [i];
7769                 if (item->is_equals) {
7770                         if (item->check_target_idx) {
7771                                 if (!item->compare_done) {
7772                                         if (amd64_use_imm32 ((gint64)item->key))
7773                                                 item->chunk_size += CMP_SIZE;
7774                                         else
7775                                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
7776                                 }
7777                                 if (item->has_target_code) {
7778                                         item->chunk_size += MOV_REG_IMM_SIZE;
7779                                 } else {
7780                                         if (vtable_is_32bit)
7781                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
7782                                         else
7783                                                 item->chunk_size += MOV_REG_IMM_SIZE;
7784                                 }
7785                                 item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
7786                         } else {
7787                                 if (fail_tramp) {
7788                                         item->chunk_size += MOV_REG_IMM_SIZE * 3 + CMP_REG_REG_SIZE +
7789                                                 BR_SMALL_SIZE + JUMP_REG_SIZE * 2;
7790                                 } else {
7791                                         if (vtable_is_32bit)
7792                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
7793                                         else
7794                                                 item->chunk_size += MOV_REG_IMM_SIZE;
7795                                         item->chunk_size += JUMP_REG_SIZE;
7796                                         /* with assert below:
7797                                          * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
7798                                          */
7799                                 }
7800                         }
7801                 } else {
7802                         if (amd64_use_imm32 ((gint64)item->key))
7803                                 item->chunk_size += CMP_SIZE;
7804                         else
7805                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
7806                         item->chunk_size += BR_LARGE_SIZE;
7807                         imt_entries [item->check_target_idx]->compare_done = TRUE;
7808                 }
7809                 size += item->chunk_size;
7810         }
7811         if (fail_tramp)
7812                 code = (guint8 *)mono_method_alloc_generic_virtual_thunk (domain, size);
7813         else
7814                 code = (guint8 *)mono_domain_code_reserve (domain, size);
7815         start = code;
7816
7817         unwind_ops = mono_arch_get_cie_program ();
7818
7819         for (i = 0; i < count; ++i) {
7820                 MonoIMTCheckItem *item = imt_entries [i];
7821                 item->code_target = code;
7822                 if (item->is_equals) {
7823                         gboolean fail_case = !item->check_target_idx && fail_tramp;
7824
7825                         if (item->check_target_idx || fail_case) {
7826                                 if (!item->compare_done || fail_case) {
7827                                         if (amd64_use_imm32 ((gint64)item->key))
7828                                                 amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof(gpointer));
7829                                         else {
7830                                                 amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_SCRATCH_REG, item->key, sizeof(gpointer));
7831                                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
7832                                         }
7833                                 }
7834                                 item->jmp_code = code;
7835                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
7836                                 if (item->has_target_code) {
7837                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->value.target_code);
7838                                         amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG);
7839                                 } else {
7840                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
7841                                         amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
7842                                 }
7843
7844                                 if (fail_case) {
7845                                         amd64_patch (item->jmp_code, code);
7846                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, fail_tramp);
7847                                         amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG);
7848                                         item->jmp_code = NULL;
7849                                 }
7850                         } else {
7851                                 /* enable the commented code to assert on wrong method */
7852 #if 0
7853                                 if (amd64_is_imm32 (item->key))
7854                                         amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof(gpointer));
7855                                 else {
7856                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->key);
7857                                         amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
7858                                 }
7859                                 item->jmp_code = code;
7860                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
7861                                 /* See the comment below about R10 */
7862                                 amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
7863                                 amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
7864                                 amd64_patch (item->jmp_code, code);
7865                                 amd64_breakpoint (code);
7866                                 item->jmp_code = NULL;
7867 #else
7868                                 /* We're using R10 (MONO_ARCH_IMT_SCRATCH_REG) here because R11 (MONO_ARCH_IMT_REG)
7869                                    needs to be preserved.  R10 needs
7870                                    to be preserved for calls which
7871                                    require a runtime generic context,
7872                                    but interface calls don't. */
7873                                 amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
7874                                 amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
7875 #endif
7876                         }
7877                 } else {
7878                         if (amd64_use_imm32 ((gint64)item->key))
7879                                 amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof (gpointer));
7880                         else {
7881                                 amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_SCRATCH_REG, item->key, sizeof (gpointer));
7882                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
7883                         }
7884                         item->jmp_code = code;
7885                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
7886                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
7887                         else
7888                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
7889                 }
7890                 g_assert (code - item->code_target <= item->chunk_size);
7891         }
7892         /* patch the branches to get to the target items */
7893         for (i = 0; i < count; ++i) {
7894                 MonoIMTCheckItem *item = imt_entries [i];
7895                 if (item->jmp_code) {
7896                         if (item->check_target_idx) {
7897                                 amd64_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
7898                         }
7899                 }
7900         }
7901
7902         if (!fail_tramp)
7903                 mono_stats.imt_thunks_size += code - start;
7904         g_assert (code - start <= size);
7905
7906         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL);
7907
7908         mono_tramp_info_register (mono_tramp_info_create (NULL, start, code - start, NULL, unwind_ops), domain);
7909
7910         return start;
7911 }
7912
7913 MonoMethod*
7914 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
7915 {
7916         return (MonoMethod*)regs [MONO_ARCH_IMT_REG];
7917 }
7918
7919 MonoVTable*
7920 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
7921 {
7922         return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
7923 }
7924
7925 GSList*
7926 mono_arch_get_cie_program (void)
7927 {
7928         GSList *l = NULL;
7929
7930         mono_add_unwind_op_def_cfa (l, (guint8*)NULL, (guint8*)NULL, AMD64_RSP, 8);
7931         mono_add_unwind_op_offset (l, (guint8*)NULL, (guint8*)NULL, AMD64_RIP, -8);
7932
7933         return l;
7934 }
7935
7936 #ifndef DISABLE_JIT
7937
7938 MonoInst*
7939 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
7940 {
7941         MonoInst *ins = NULL;
7942         int opcode = 0;
7943
7944         if (cmethod->klass == mono_defaults.math_class) {
7945                 if (strcmp (cmethod->name, "Sin") == 0) {
7946                         opcode = OP_SIN;
7947                 } else if (strcmp (cmethod->name, "Cos") == 0) {
7948                         opcode = OP_COS;
7949                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
7950                         opcode = OP_SQRT;
7951                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
7952                         opcode = OP_ABS;
7953                 }
7954                 
7955                 if (opcode && fsig->param_count == 1) {
7956                         MONO_INST_NEW (cfg, ins, opcode);
7957                         ins->type = STACK_R8;
7958                         ins->dreg = mono_alloc_freg (cfg);
7959                         ins->sreg1 = args [0]->dreg;
7960                         MONO_ADD_INS (cfg->cbb, ins);
7961                 }
7962
7963                 opcode = 0;
7964                 if (cfg->opt & MONO_OPT_CMOV) {
7965                         if (strcmp (cmethod->name, "Min") == 0) {
7966                                 if (fsig->params [0]->type == MONO_TYPE_I4)
7967                                         opcode = OP_IMIN;
7968                                 if (fsig->params [0]->type == MONO_TYPE_U4)
7969                                         opcode = OP_IMIN_UN;
7970                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
7971                                         opcode = OP_LMIN;
7972                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
7973                                         opcode = OP_LMIN_UN;
7974                         } else if (strcmp (cmethod->name, "Max") == 0) {
7975                                 if (fsig->params [0]->type == MONO_TYPE_I4)
7976                                         opcode = OP_IMAX;
7977                                 if (fsig->params [0]->type == MONO_TYPE_U4)
7978                                         opcode = OP_IMAX_UN;
7979                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
7980                                         opcode = OP_LMAX;
7981                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
7982                                         opcode = OP_LMAX_UN;
7983                         }
7984                 }
7985                 
7986                 if (opcode && fsig->param_count == 2) {
7987                         MONO_INST_NEW (cfg, ins, opcode);
7988                         ins->type = fsig->params [0]->type == MONO_TYPE_I4 ? STACK_I4 : STACK_I8;
7989                         ins->dreg = mono_alloc_ireg (cfg);
7990                         ins->sreg1 = args [0]->dreg;
7991                         ins->sreg2 = args [1]->dreg;
7992                         MONO_ADD_INS (cfg->cbb, ins);
7993                 }
7994
7995 #if 0
7996                 /* OP_FREM is not IEEE compatible */
7997                 else if (strcmp (cmethod->name, "IEEERemainder") == 0 && fsig->param_count == 2) {
7998                         MONO_INST_NEW (cfg, ins, OP_FREM);
7999                         ins->inst_i0 = args [0];
8000                         ins->inst_i1 = args [1];
8001                 }
8002 #endif
8003         }
8004
8005         return ins;
8006 }
8007 #endif
8008
8009 gboolean
8010 mono_arch_print_tree (MonoInst *tree, int arity)
8011 {
8012         return 0;
8013 }
8014
8015 mgreg_t
8016 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
8017 {
8018         return ctx->gregs [reg];
8019 }
8020
8021 void
8022 mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val)
8023 {
8024         ctx->gregs [reg] = val;
8025 }
8026
8027 gpointer
8028 mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
8029 {
8030         gpointer *sp, old_value;
8031         char *bp;
8032
8033         /*Load the spvar*/
8034         bp = (char *)MONO_CONTEXT_GET_BP (ctx);
8035         sp = (gpointer *)*(gpointer*)(bp + clause->exvar_offset);
8036
8037         old_value = *sp;
8038         if (old_value < ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
8039                 return old_value;
8040
8041         *sp = new_value;
8042
8043         return old_value;
8044 }
8045
8046 /*
8047  * mono_arch_emit_load_aotconst:
8048  *
8049  *   Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
8050  * TARGET from the mscorlib GOT in full-aot code.
8051  * On AMD64, the result is placed into R11.
8052  */
8053 guint8*
8054 mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, MonoJumpInfoType tramp_type, gconstpointer target)
8055 {
8056         *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
8057         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
8058
8059         return code;
8060 }
8061
8062 /*
8063  * mono_arch_get_trampolines:
8064  *
8065  *   Return a list of MonoTrampInfo structures describing arch specific trampolines
8066  * for AOT.
8067  */
8068 GSList *
8069 mono_arch_get_trampolines (gboolean aot)
8070 {
8071         return mono_amd64_get_exception_trampolines (aot);
8072 }
8073
8074 /* Soft Debug support */
8075 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
8076
8077 /*
8078  * mono_arch_set_breakpoint:
8079  *
8080  *   Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
8081  * The location should contain code emitted by OP_SEQ_POINT.
8082  */
8083 void
8084 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
8085 {
8086         guint8 *code = ip;
8087
8088         if (ji->from_aot) {
8089                 guint32 native_offset = ip - (guint8*)ji->code_start;
8090                 SeqPointInfo *info = (SeqPointInfo *)mono_arch_get_seq_point_info (mono_domain_get (), (guint8 *)ji->code_start);
8091
8092                 g_assert (info->bp_addrs [native_offset] == 0);
8093                 info->bp_addrs [native_offset] = mini_get_breakpoint_trampoline ();
8094         } else {
8095                 /* ip points to a mov r11, 0 */
8096                 g_assert (code [0] == 0x41);
8097                 g_assert (code [1] == 0xbb);
8098                 amd64_mov_reg_imm (code, AMD64_R11, 1);
8099         }
8100 }
8101
8102 /*
8103  * mono_arch_clear_breakpoint:
8104  *
8105  *   Clear the breakpoint at IP.
8106  */
8107 void
8108 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
8109 {
8110         guint8 *code = ip;
8111
8112         if (ji->from_aot) {
8113                 guint32 native_offset = ip - (guint8*)ji->code_start;
8114                 SeqPointInfo *info = (SeqPointInfo *)mono_arch_get_seq_point_info (mono_domain_get (), (guint8 *)ji->code_start);
8115
8116                 info->bp_addrs [native_offset] = NULL;
8117         } else {
8118                 amd64_mov_reg_imm (code, AMD64_R11, 0);
8119         }
8120 }
8121
8122 gboolean
8123 mono_arch_is_breakpoint_event (void *info, void *sigctx)
8124 {
8125         /* We use soft breakpoints on amd64 */
8126         return FALSE;
8127 }
8128
8129 /*
8130  * mono_arch_skip_breakpoint:
8131  *
8132  *   Modify CTX so the ip is placed after the breakpoint instruction, so when
8133  * we resume, the instruction is not executed again.
8134  */
8135 void
8136 mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
8137 {
8138         g_assert_not_reached ();
8139 }
8140         
8141 /*
8142  * mono_arch_start_single_stepping:
8143  *
8144  *   Start single stepping.
8145  */
8146 void
8147 mono_arch_start_single_stepping (void)
8148 {
8149         ss_trampoline = mini_get_single_step_trampoline ();
8150 }
8151         
8152 /*
8153  * mono_arch_stop_single_stepping:
8154  *
8155  *   Stop single stepping.
8156  */
8157 void
8158 mono_arch_stop_single_stepping (void)
8159 {
8160         ss_trampoline = NULL;
8161 }
8162
8163 /*
8164  * mono_arch_is_single_step_event:
8165  *
8166  *   Return whenever the machine state in SIGCTX corresponds to a single
8167  * step event.
8168  */
8169 gboolean
8170 mono_arch_is_single_step_event (void *info, void *sigctx)
8171 {
8172         /* We use soft breakpoints on amd64 */
8173         return FALSE;
8174 }
8175
8176 /*
8177  * mono_arch_skip_single_step:
8178  *
8179  *   Modify CTX so the ip is placed after the single step trigger instruction,
8180  * we resume, the instruction is not executed again.
8181  */
8182 void
8183 mono_arch_skip_single_step (MonoContext *ctx)
8184 {
8185         g_assert_not_reached ();
8186 }
8187
8188 /*
8189  * mono_arch_create_seq_point_info:
8190  *
8191  *   Return a pointer to a data structure which is used by the sequence
8192  * point implementation in AOTed code.
8193  */
8194 gpointer
8195 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
8196 {
8197         SeqPointInfo *info;
8198         MonoJitInfo *ji;
8199
8200         // FIXME: Add a free function
8201
8202         mono_domain_lock (domain);
8203         info = (SeqPointInfo *)g_hash_table_lookup (domain_jit_info (domain)->arch_seq_points,
8204                                                                 code);
8205         mono_domain_unlock (domain);
8206
8207         if (!info) {
8208                 ji = mono_jit_info_table_find (domain, (char*)code);
8209                 g_assert (ji);
8210
8211                 // FIXME: Optimize the size
8212                 info = (SeqPointInfo *)g_malloc0 (sizeof (SeqPointInfo) + (ji->code_size * sizeof (gpointer)));
8213
8214                 info->ss_tramp_addr = &ss_trampoline;
8215
8216                 mono_domain_lock (domain);
8217                 g_hash_table_insert (domain_jit_info (domain)->arch_seq_points,
8218                                                          code, info);
8219                 mono_domain_unlock (domain);
8220         }
8221
8222         return info;
8223 }
8224
8225 void
8226 mono_arch_init_lmf_ext (MonoLMFExt *ext, gpointer prev_lmf)
8227 {
8228         ext->lmf.previous_lmf = prev_lmf;
8229         /* Mark that this is a MonoLMFExt */
8230         ext->lmf.previous_lmf = (gpointer)(((gssize)ext->lmf.previous_lmf) | 2);
8231         ext->lmf.rsp = (gssize)ext;
8232 }
8233
8234 #endif
8235
8236 gboolean
8237 mono_arch_opcode_supported (int opcode)
8238 {
8239         switch (opcode) {
8240         case OP_ATOMIC_ADD_I4:
8241         case OP_ATOMIC_ADD_I8:
8242         case OP_ATOMIC_EXCHANGE_I4:
8243         case OP_ATOMIC_EXCHANGE_I8:
8244         case OP_ATOMIC_CAS_I4:
8245         case OP_ATOMIC_CAS_I8:
8246         case OP_ATOMIC_LOAD_I1:
8247         case OP_ATOMIC_LOAD_I2:
8248         case OP_ATOMIC_LOAD_I4:
8249         case OP_ATOMIC_LOAD_I8:
8250         case OP_ATOMIC_LOAD_U1:
8251         case OP_ATOMIC_LOAD_U2:
8252         case OP_ATOMIC_LOAD_U4:
8253         case OP_ATOMIC_LOAD_U8:
8254         case OP_ATOMIC_LOAD_R4:
8255         case OP_ATOMIC_LOAD_R8:
8256         case OP_ATOMIC_STORE_I1:
8257         case OP_ATOMIC_STORE_I2:
8258         case OP_ATOMIC_STORE_I4:
8259         case OP_ATOMIC_STORE_I8:
8260         case OP_ATOMIC_STORE_U1:
8261         case OP_ATOMIC_STORE_U2:
8262         case OP_ATOMIC_STORE_U4:
8263         case OP_ATOMIC_STORE_U8:
8264         case OP_ATOMIC_STORE_R4:
8265         case OP_ATOMIC_STORE_R8:
8266                 return TRUE;
8267         default:
8268                 return FALSE;
8269         }
8270 }
8271
8272 CallInfo*
8273 mono_arch_get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
8274 {
8275         return get_call_info (mp, sig);
8276 }