Merge pull request #463 from strawd/concurrent-requests
[mono.git] / mono / mini / mini-amd64.c
1 /*
2  * mini-amd64.c: AMD64 backend for the Mono code generator
3  *
4  * Based on mini-x86.c.
5  *
6  * Authors:
7  *   Paolo Molaro (lupus@ximian.com)
8  *   Dietmar Maurer (dietmar@ximian.com)
9  *   Patrik Torstensson
10  *   Zoltan Varga (vargaz@gmail.com)
11  *
12  * (C) 2003 Ximian, Inc.
13  * Copyright 2003-2011 Novell, Inc (http://www.novell.com)
14  * Copyright 2011 Xamarin, Inc (http://www.xamarin.com)
15  */
16 #include "mini.h"
17 #include <string.h>
18 #include <math.h>
19 #ifdef HAVE_UNISTD_H
20 #include <unistd.h>
21 #endif
22
23 #include <mono/metadata/abi-details.h>
24 #include <mono/metadata/appdomain.h>
25 #include <mono/metadata/debug-helpers.h>
26 #include <mono/metadata/threads.h>
27 #include <mono/metadata/profiler-private.h>
28 #include <mono/metadata/mono-debug.h>
29 #include <mono/metadata/gc-internal.h>
30 #include <mono/utils/mono-math.h>
31 #include <mono/utils/mono-mmap.h>
32 #include <mono/utils/mono-memory-model.h>
33 #include <mono/utils/mono-tls.h>
34 #include <mono/utils/mono-hwcap-x86.h>
35 #include <mono/utils/mono-threads.h>
36
37 #include "trace.h"
38 #include "ir-emit.h"
39 #include "mini-amd64.h"
40 #include "cpu-amd64.h"
41 #include "debugger-agent.h"
42 #include "mini-gc.h"
43
44 #ifdef MONO_XEN_OPT
45 static gboolean optimize_for_xen = TRUE;
46 #else
47 #define optimize_for_xen 0
48 #endif
49
50 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
51
52 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
53
54 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
55
56 #ifdef TARGET_WIN32
57 /* Under windows, the calling convention is never stdcall */
58 #define CALLCONV_IS_STDCALL(call_conv) (FALSE)
59 #else
60 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
61 #endif
62
63 /* This mutex protects architecture specific caches */
64 #define mono_mini_arch_lock() mono_mutex_lock (&mini_arch_mutex)
65 #define mono_mini_arch_unlock() mono_mutex_unlock (&mini_arch_mutex)
66 static mono_mutex_t mini_arch_mutex;
67
68 /*
69  * The code generated for sequence points reads from this location, which is
70  * made read-only when single stepping is enabled.
71  */
72 static gpointer ss_trigger_page;
73
74 /* Enabled breakpoints read from this trigger page */
75 static gpointer bp_trigger_page;
76
77 /* The size of the breakpoint sequence */
78 static int breakpoint_size;
79
80 /* The size of the breakpoint instruction causing the actual fault */
81 static int breakpoint_fault_size;
82
83 /* The size of the single step instruction causing the actual fault */
84 static int single_step_fault_size;
85
86 /* The single step trampoline */
87 static gpointer ss_trampoline;
88
89 /* Offset between fp and the first argument in the callee */
90 #define ARGS_OFFSET 16
91 #define GP_SCRATCH_REG AMD64_R11
92
93 /*
94  * AMD64 register usage:
95  * - callee saved registers are used for global register allocation
96  * - %r11 is used for materializing 64 bit constants in opcodes
97  * - the rest is used for local allocation
98  */
99
100 /*
101  * Floating point comparison results:
102  *                  ZF PF CF
103  * A > B            0  0  0
104  * A < B            0  0  1
105  * A = B            1  0  0
106  * A > B            0  0  0
107  * UNORDERED        1  1  1
108  */
109
110 const char*
111 mono_arch_regname (int reg)
112 {
113         switch (reg) {
114         case AMD64_RAX: return "%rax";
115         case AMD64_RBX: return "%rbx";
116         case AMD64_RCX: return "%rcx";
117         case AMD64_RDX: return "%rdx";
118         case AMD64_RSP: return "%rsp";  
119         case AMD64_RBP: return "%rbp";
120         case AMD64_RDI: return "%rdi";
121         case AMD64_RSI: return "%rsi";
122         case AMD64_R8: return "%r8";
123         case AMD64_R9: return "%r9";
124         case AMD64_R10: return "%r10";
125         case AMD64_R11: return "%r11";
126         case AMD64_R12: return "%r12";
127         case AMD64_R13: return "%r13";
128         case AMD64_R14: return "%r14";
129         case AMD64_R15: return "%r15";
130         }
131         return "unknown";
132 }
133
134 static const char * packed_xmmregs [] = {
135         "p:xmm0", "p:xmm1", "p:xmm2", "p:xmm3", "p:xmm4", "p:xmm5", "p:xmm6", "p:xmm7", "p:xmm8",
136         "p:xmm9", "p:xmm10", "p:xmm11", "p:xmm12", "p:xmm13", "p:xmm14", "p:xmm15"
137 };
138
139 static const char * single_xmmregs [] = {
140         "s:xmm0", "s:xmm1", "s:xmm2", "s:xmm3", "s:xmm4", "s:xmm5", "s:xmm6", "s:xmm7", "s:xmm8",
141         "s:xmm9", "s:xmm10", "s:xmm11", "s:xmm12", "s:xmm13", "s:xmm14", "s:xmm15"
142 };
143
144 const char*
145 mono_arch_fregname (int reg)
146 {
147         if (reg < AMD64_XMM_NREG)
148                 return single_xmmregs [reg];
149         else
150                 return "unknown";
151 }
152
153 const char *
154 mono_arch_xregname (int reg)
155 {
156         if (reg < AMD64_XMM_NREG)
157                 return packed_xmmregs [reg];
158         else
159                 return "unknown";
160 }
161
162 static gboolean
163 debug_omit_fp (void)
164 {
165 #if 0
166         return mono_debug_count ();
167 #else
168         return TRUE;
169 #endif
170 }
171
172 static inline gboolean
173 amd64_is_near_call (guint8 *code)
174 {
175         /* Skip REX */
176         if ((code [0] >= 0x40) && (code [0] <= 0x4f))
177                 code += 1;
178
179         return code [0] == 0xe8;
180 }
181
182 static inline gboolean
183 amd64_use_imm32 (gint64 val)
184 {
185         if (mini_get_debug_options()->single_imm_size)
186                 return FALSE;
187
188         return amd64_is_imm32 (val);
189 }
190
191 #ifdef __native_client_codegen__
192
193 /* Keep track of instruction "depth", that is, the level of sub-instruction */
194 /* for any given instruction.  For instance, amd64_call_reg resolves to     */
195 /* amd64_call_reg_internal, which uses amd64_alu_* macros, etc.             */
196 /* We only want to force bundle alignment for the top level instruction,    */
197 /* so NaCl pseudo-instructions can be implemented with sub instructions.    */
198 static MonoNativeTlsKey nacl_instruction_depth;
199
200 static MonoNativeTlsKey nacl_rex_tag;
201 static MonoNativeTlsKey nacl_legacy_prefix_tag;
202
203 void
204 amd64_nacl_clear_legacy_prefix_tag ()
205 {
206         mono_native_tls_set_value (nacl_legacy_prefix_tag, NULL);
207 }
208
209 void
210 amd64_nacl_tag_legacy_prefix (guint8* code)
211 {
212         if (mono_native_tls_get_value (nacl_legacy_prefix_tag) == NULL)
213                 mono_native_tls_set_value (nacl_legacy_prefix_tag, code);
214 }
215
216 void
217 amd64_nacl_tag_rex (guint8* code)
218 {
219         mono_native_tls_set_value (nacl_rex_tag, code);
220 }
221
222 guint8*
223 amd64_nacl_get_legacy_prefix_tag ()
224 {
225         return (guint8*)mono_native_tls_get_value (nacl_legacy_prefix_tag);
226 }
227
228 guint8*
229 amd64_nacl_get_rex_tag ()
230 {
231         return (guint8*)mono_native_tls_get_value (nacl_rex_tag);
232 }
233
234 /* Increment the instruction "depth" described above */
235 void
236 amd64_nacl_instruction_pre ()
237 {
238         intptr_t depth = (intptr_t) mono_native_tls_get_value (nacl_instruction_depth);
239         depth++;
240         mono_native_tls_set_value (nacl_instruction_depth, (gpointer)depth);
241 }
242
243 /* amd64_nacl_instruction_post: Decrement instruction "depth", force bundle */
244 /* alignment if depth == 0 (top level instruction)                          */
245 /* IN: start, end    pointers to instruction beginning and end              */
246 /* OUT: start, end   pointers to beginning and end after possible alignment */
247 /* GLOBALS: nacl_instruction_depth     defined above                        */
248 void
249 amd64_nacl_instruction_post (guint8 **start, guint8 **end)
250 {
251         intptr_t depth = (intptr_t) mono_native_tls_get_value (nacl_instruction_depth);
252         depth--;
253         mono_native_tls_set_value (nacl_instruction_depth, (void*)depth);
254
255         g_assert ( depth >= 0 );
256         if (depth == 0) {
257                 uintptr_t space_in_block;
258                 uintptr_t instlen;
259                 guint8 *prefix = amd64_nacl_get_legacy_prefix_tag ();
260                 /* if legacy prefix is present, and if it was emitted before */
261                 /* the start of the instruction sequence, adjust the start   */
262                 if (prefix != NULL && prefix < *start) {
263                         g_assert (*start - prefix <= 3);/* only 3 are allowed */
264                         *start = prefix;
265                 }
266                 space_in_block = kNaClAlignment - ((uintptr_t)(*start) & kNaClAlignmentMask);
267                 instlen = (uintptr_t)(*end - *start);
268                 /* Only check for instructions which are less than        */
269                 /* kNaClAlignment. The only instructions that should ever */
270                 /* be that long are call sequences, which are already     */
271                 /* padded out to align the return to the next bundle.     */
272                 if (instlen > space_in_block && instlen < kNaClAlignment) {
273                         const size_t MAX_NACL_INST_LENGTH = kNaClAlignment;
274                         guint8 copy_of_instruction[MAX_NACL_INST_LENGTH];
275                         const size_t length = (size_t)((*end)-(*start));
276                         g_assert (length < MAX_NACL_INST_LENGTH);
277                         
278                         memcpy (copy_of_instruction, *start, length);
279                         *start = mono_arch_nacl_pad (*start, space_in_block);
280                         memcpy (*start, copy_of_instruction, length);
281                         *end = *start + length;
282                 }
283                 amd64_nacl_clear_legacy_prefix_tag ();
284                 amd64_nacl_tag_rex (NULL);
285         }
286 }
287
288 /* amd64_nacl_membase_handler: ensure all access to memory of the form      */
289 /*   OFFSET(%rXX) is sandboxed.  For allowable base registers %rip, %rbp,   */
290 /*   %rsp, and %r15, emit the membase as usual.  For all other registers,   */
291 /*   make sure the upper 32-bits are cleared, and use that register in the  */
292 /*   index field of a new address of this form: OFFSET(%r15,%eXX,1)         */
293 /* IN:      code                                                            */
294 /*             pointer to current instruction stream (in the                */
295 /*             middle of an instruction, after opcode is emitted)           */
296 /*          basereg/offset/dreg                                             */
297 /*             operands of normal membase address                           */
298 /* OUT:     code                                                            */
299 /*             pointer to the end of the membase/memindex emit              */
300 /* GLOBALS: nacl_rex_tag                                                    */
301 /*             position in instruction stream that rex prefix was emitted   */
302 /*          nacl_legacy_prefix_tag                                          */
303 /*             (possibly NULL) position in instruction of legacy x86 prefix */
304 void
305 amd64_nacl_membase_handler (guint8** code, gint8 basereg, gint32 offset, gint8 dreg)
306 {
307         gint8 true_basereg = basereg;
308
309         /* Cache these values, they might change  */
310         /* as new instructions are emitted below. */
311         guint8* rex_tag = amd64_nacl_get_rex_tag ();
312         guint8* legacy_prefix_tag = amd64_nacl_get_legacy_prefix_tag ();
313
314         /* 'basereg' is given masked to 0x7 at this point, so check */
315         /* the rex prefix to see if this is an extended register.   */
316         if ((rex_tag != NULL) && IS_REX(*rex_tag) && (*rex_tag & AMD64_REX_B)) {
317                 true_basereg |= 0x8;
318         }
319
320 #define X86_LEA_OPCODE (0x8D)
321
322         if (!amd64_is_valid_nacl_base (true_basereg) && (*(*code-1) != X86_LEA_OPCODE)) {
323                 guint8* old_instruction_start;
324                 
325                 /* This will hold the 'mov %eXX, %eXX' that clears the upper */
326                 /* 32-bits of the old base register (new index register)     */
327                 guint8 buf[32];
328                 guint8* buf_ptr = buf;
329                 size_t insert_len;
330
331                 g_assert (rex_tag != NULL);
332
333                 if (IS_REX(*rex_tag)) {
334                         /* The old rex.B should be the new rex.X */
335                         if (*rex_tag & AMD64_REX_B) {
336                                 *rex_tag |= AMD64_REX_X;
337                         }
338                         /* Since our new base is %r15 set rex.B */
339                         *rex_tag |= AMD64_REX_B;
340                 } else {
341                         /* Shift the instruction by one byte  */
342                         /* so we can insert a rex prefix      */
343                         memmove (rex_tag + 1, rex_tag, (size_t)(*code - rex_tag));
344                         *code += 1;
345                         /* New rex prefix only needs rex.B for %r15 base */
346                         *rex_tag = AMD64_REX(AMD64_REX_B);
347                 }
348
349                 if (legacy_prefix_tag) {
350                         old_instruction_start = legacy_prefix_tag;
351                 } else {
352                         old_instruction_start = rex_tag;
353                 }
354                 
355                 /* Clears the upper 32-bits of the previous base register */
356                 amd64_mov_reg_reg_size (buf_ptr, true_basereg, true_basereg, 4);
357                 insert_len = buf_ptr - buf;
358                 
359                 /* Move the old instruction forward to make */
360                 /* room for 'mov' stored in 'buf_ptr'       */
361                 memmove (old_instruction_start + insert_len, old_instruction_start, (size_t)(*code - old_instruction_start));
362                 *code += insert_len;
363                 memcpy (old_instruction_start, buf, insert_len);
364
365                 /* Sandboxed replacement for the normal membase_emit */
366                 x86_memindex_emit (*code, dreg, AMD64_R15, offset, basereg, 0);
367                 
368         } else {
369                 /* Normal default behavior, emit membase memory location */
370                 x86_membase_emit_body (*code, dreg, basereg, offset);
371         }
372 }
373
374
375 static inline unsigned char*
376 amd64_skip_nops (unsigned char* code)
377 {
378         guint8 in_nop;
379         do {
380                 in_nop = 0;
381                 if (   code[0] == 0x90) {
382                         in_nop = 1;
383                         code += 1;
384                 }
385                 if (   code[0] == 0x66 && code[1] == 0x90) {
386                         in_nop = 1;
387                         code += 2;
388                 }
389                 if (code[0] == 0x0f && code[1] == 0x1f
390                  && code[2] == 0x00) {
391                         in_nop = 1;
392                         code += 3;
393                 }
394                 if (code[0] == 0x0f && code[1] == 0x1f
395                  && code[2] == 0x40 && code[3] == 0x00) {
396                         in_nop = 1;
397                         code += 4;
398                 }
399                 if (code[0] == 0x0f && code[1] == 0x1f
400                  && code[2] == 0x44 && code[3] == 0x00
401                  && code[4] == 0x00) {
402                         in_nop = 1;
403                         code += 5;
404                 }
405                 if (code[0] == 0x66 && code[1] == 0x0f
406                  && code[2] == 0x1f && code[3] == 0x44
407                  && code[4] == 0x00 && code[5] == 0x00) {
408                         in_nop = 1;
409                         code += 6;
410                 }
411                 if (code[0] == 0x0f && code[1] == 0x1f
412                  && code[2] == 0x80 && code[3] == 0x00
413                  && code[4] == 0x00 && code[5] == 0x00
414                  && code[6] == 0x00) {
415                         in_nop = 1;
416                         code += 7;
417                 }
418                 if (code[0] == 0x0f && code[1] == 0x1f
419                  && code[2] == 0x84 && code[3] == 0x00
420                  && code[4] == 0x00 && code[5] == 0x00
421                  && code[6] == 0x00 && code[7] == 0x00) {
422                         in_nop = 1;
423                         code += 8;
424                 }
425         } while ( in_nop );
426         return code;
427 }
428
429 guint8*
430 mono_arch_nacl_skip_nops (guint8* code)
431 {
432   return amd64_skip_nops(code);
433 }
434
435 #endif /*__native_client_codegen__*/
436
437 static inline void 
438 amd64_patch (unsigned char* code, gpointer target)
439 {
440         guint8 rex = 0;
441
442 #ifdef __native_client_codegen__
443         code = amd64_skip_nops (code);
444 #endif
445 #if defined(__native_client_codegen__) && defined(__native_client__)
446         if (nacl_is_code_address (code)) {
447                 /* For tail calls, code is patched after being installed */
448                 /* but not through the normal "patch callsite" method.   */
449                 unsigned char buf[kNaClAlignment];
450                 unsigned char *aligned_code = (uintptr_t)code & ~kNaClAlignmentMask;
451                 int ret;
452                 memcpy (buf, aligned_code, kNaClAlignment);
453                 /* Patch a temp buffer of bundle size, */
454                 /* then install to actual location.    */
455                 amd64_patch (buf + ((uintptr_t)code - (uintptr_t)aligned_code), target);
456                 ret = nacl_dyncode_modify (aligned_code, buf, kNaClAlignment);
457                 g_assert (ret == 0);
458                 return;
459         }
460         target = nacl_modify_patch_target (target);
461 #endif
462
463         /* Skip REX */
464         if ((code [0] >= 0x40) && (code [0] <= 0x4f)) {
465                 rex = code [0];
466                 code += 1;
467         }
468
469         if ((code [0] & 0xf8) == 0xb8) {
470                 /* amd64_set_reg_template */
471                 *(guint64*)(code + 1) = (guint64)target;
472         }
473         else if ((code [0] == 0x8b) && rex && x86_modrm_mod (code [1]) == 0 && x86_modrm_rm (code [1]) == 5) {
474                 /* mov 0(%rip), %dreg */
475                 *(guint32*)(code + 2) = (guint32)(guint64)target - 7;
476         }
477         else if ((code [0] == 0xff) && (code [1] == 0x15)) {
478                 /* call *<OFFSET>(%rip) */
479                 *(guint32*)(code + 2) = ((guint32)(guint64)target) - 7;
480         }
481         else if (code [0] == 0xe8) {
482                 /* call <DISP> */
483                 gint64 disp = (guint8*)target - (guint8*)code;
484                 g_assert (amd64_is_imm32 (disp));
485                 x86_patch (code, (unsigned char*)target);
486         }
487         else
488                 x86_patch (code, (unsigned char*)target);
489 }
490
491 void 
492 mono_amd64_patch (unsigned char* code, gpointer target)
493 {
494         amd64_patch (code, target);
495 }
496
497 typedef enum {
498         ArgInIReg,
499         ArgInFloatSSEReg,
500         ArgInDoubleSSEReg,
501         ArgOnStack,
502         ArgValuetypeInReg,
503         ArgValuetypeAddrInIReg,
504         /* gsharedvt argument passed by addr */
505         ArgGSharedVtInReg,
506         ArgGSharedVtOnStack,
507         ArgNone /* only in pair_storage */
508 } ArgStorage;
509
510 typedef struct {
511         gint16 offset;
512         gint8  reg;
513         ArgStorage storage;
514
515         /* Only if storage == ArgValuetypeInReg */
516         ArgStorage pair_storage [2];
517         gint8 pair_regs [2];
518         /* The size of each pair */
519         int pair_size [2];
520         int nregs;
521 } ArgInfo;
522
523 typedef struct {
524         int nargs;
525         guint32 stack_usage;
526         guint32 reg_usage;
527         guint32 freg_usage;
528         gboolean need_stack_align;
529         /* The index of the vret arg in the argument list */
530         int vret_arg_index;
531         ArgInfo ret;
532         ArgInfo sig_cookie;
533         ArgInfo args [1];
534 } CallInfo;
535
536 #define DEBUG(a) if (cfg->verbose_level > 1) a
537
538 #ifdef TARGET_WIN32
539 static AMD64_Reg_No param_regs [] = { AMD64_RCX, AMD64_RDX, AMD64_R8, AMD64_R9 };
540
541 static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
542 #else
543 static AMD64_Reg_No param_regs [] = { AMD64_RDI, AMD64_RSI, AMD64_RDX, AMD64_RCX, AMD64_R8, AMD64_R9 };
544
545  static AMD64_Reg_No return_regs [] = { AMD64_RAX, AMD64_RDX };
546 #endif
547
548 static void inline
549 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
550 {
551     ainfo->offset = *stack_size;
552
553     if (*gr >= PARAM_REGS) {
554                 ainfo->storage = ArgOnStack;
555                 /* Since the same stack slot size is used for all arg */
556                 /*  types, it needs to be big enough to hold them all */
557                 (*stack_size) += sizeof(mgreg_t);
558     }
559     else {
560                 ainfo->storage = ArgInIReg;
561                 ainfo->reg = param_regs [*gr];
562                 (*gr) ++;
563     }
564 }
565
566 #ifdef TARGET_WIN32
567 #define FLOAT_PARAM_REGS 4
568 #else
569 #define FLOAT_PARAM_REGS 8
570 #endif
571
572 static void inline
573 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
574 {
575     ainfo->offset = *stack_size;
576
577     if (*gr >= FLOAT_PARAM_REGS) {
578                 ainfo->storage = ArgOnStack;
579                 /* Since the same stack slot size is used for both float */
580                 /*  types, it needs to be big enough to hold them both */
581                 (*stack_size) += sizeof(mgreg_t);
582     }
583     else {
584                 /* A double register */
585                 if (is_double)
586                         ainfo->storage = ArgInDoubleSSEReg;
587                 else
588                         ainfo->storage = ArgInFloatSSEReg;
589                 ainfo->reg = *gr;
590                 (*gr) += 1;
591     }
592 }
593
594 typedef enum ArgumentClass {
595         ARG_CLASS_NO_CLASS,
596         ARG_CLASS_MEMORY,
597         ARG_CLASS_INTEGER,
598         ARG_CLASS_SSE
599 } ArgumentClass;
600
601 static ArgumentClass
602 merge_argument_class_from_type (MonoType *type, ArgumentClass class1)
603 {
604         ArgumentClass class2 = ARG_CLASS_NO_CLASS;
605         MonoType *ptype;
606
607         ptype = mini_get_underlying_type (type);
608         switch (ptype->type) {
609         case MONO_TYPE_I1:
610         case MONO_TYPE_U1:
611         case MONO_TYPE_I2:
612         case MONO_TYPE_U2:
613         case MONO_TYPE_I4:
614         case MONO_TYPE_U4:
615         case MONO_TYPE_I:
616         case MONO_TYPE_U:
617         case MONO_TYPE_STRING:
618         case MONO_TYPE_OBJECT:
619         case MONO_TYPE_CLASS:
620         case MONO_TYPE_SZARRAY:
621         case MONO_TYPE_PTR:
622         case MONO_TYPE_FNPTR:
623         case MONO_TYPE_ARRAY:
624         case MONO_TYPE_I8:
625         case MONO_TYPE_U8:
626                 class2 = ARG_CLASS_INTEGER;
627                 break;
628         case MONO_TYPE_R4:
629         case MONO_TYPE_R8:
630 #ifdef TARGET_WIN32
631                 class2 = ARG_CLASS_INTEGER;
632 #else
633                 class2 = ARG_CLASS_SSE;
634 #endif
635                 break;
636
637         case MONO_TYPE_TYPEDBYREF:
638                 g_assert_not_reached ();
639
640         case MONO_TYPE_GENERICINST:
641                 if (!mono_type_generic_inst_is_valuetype (ptype)) {
642                         class2 = ARG_CLASS_INTEGER;
643                         break;
644                 }
645                 /* fall through */
646         case MONO_TYPE_VALUETYPE: {
647                 MonoMarshalType *info = mono_marshal_load_type_info (ptype->data.klass);
648                 int i;
649
650                 for (i = 0; i < info->num_fields; ++i) {
651                         class2 = class1;
652                         class2 = merge_argument_class_from_type (info->fields [i].field->type, class2);
653                 }
654                 break;
655         }
656         default:
657                 g_assert_not_reached ();
658         }
659
660         /* Merge */
661         if (class1 == class2)
662                 ;
663         else if (class1 == ARG_CLASS_NO_CLASS)
664                 class1 = class2;
665         else if ((class1 == ARG_CLASS_MEMORY) || (class2 == ARG_CLASS_MEMORY))
666                 class1 = ARG_CLASS_MEMORY;
667         else if ((class1 == ARG_CLASS_INTEGER) || (class2 == ARG_CLASS_INTEGER))
668                 class1 = ARG_CLASS_INTEGER;
669         else
670                 class1 = ARG_CLASS_SSE;
671
672         return class1;
673 }
674 #ifdef __native_client_codegen__
675
676 /* Default alignment for Native Client is 32-byte. */
677 gint8 nacl_align_byte = -32; /* signed version of 0xe0 */
678
679 /* mono_arch_nacl_pad: Add pad bytes of alignment instructions at code,  */
680 /* Check that alignment doesn't cross an alignment boundary.             */
681 guint8*
682 mono_arch_nacl_pad(guint8 *code, int pad)
683 {
684         const int kMaxPadding = 8; /* see amd64-codegen.h:amd64_padding_size() */
685
686         if (pad == 0) return code;
687         /* assertion: alignment cannot cross a block boundary */
688         g_assert (((uintptr_t)code & (~kNaClAlignmentMask)) ==
689                  (((uintptr_t)code + pad - 1) & (~kNaClAlignmentMask)));
690         while (pad >= kMaxPadding) {
691                 amd64_padding (code, kMaxPadding);
692                 pad -= kMaxPadding;
693         }
694         if (pad != 0) amd64_padding (code, pad);
695         return code;
696 }
697 #endif
698
699 static int
700 count_fields_nested (MonoClass *klass)
701 {
702         MonoMarshalType *info;
703         int i, count;
704
705         info = mono_marshal_load_type_info (klass);
706         g_assert(info);
707         count = 0;
708         for (i = 0; i < info->num_fields; ++i) {
709                 if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type))
710                         count += count_fields_nested (mono_class_from_mono_type (info->fields [i].field->type));
711                 else
712                         count ++;
713         }
714         return count;
715 }
716
717 static int
718 collect_field_info_nested (MonoClass *klass, MonoMarshalField *fields, int index, int offset)
719 {
720         MonoMarshalType *info;
721         int i;
722
723         info = mono_marshal_load_type_info (klass);
724         g_assert(info);
725         for (i = 0; i < info->num_fields; ++i) {
726                 if (MONO_TYPE_ISSTRUCT (info->fields [i].field->type)) {
727                         index = collect_field_info_nested (mono_class_from_mono_type (info->fields [i].field->type), fields, index, info->fields [i].offset);
728                 } else {
729                         memcpy (&fields [index], &info->fields [i], sizeof (MonoMarshalField));
730                         fields [index].offset += offset;
731                         index ++;
732                 }
733         }
734         return index;
735 }
736
737 static void
738 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
739                            gboolean is_return,
740                            guint32 *gr, guint32 *fr, guint32 *stack_size)
741 {
742         guint32 size, quad, nquads, i, nfields;
743         /* Keep track of the size used in each quad so we can */
744         /* use the right size when copying args/return vars.  */
745         guint32 quadsize [2] = {8, 8};
746         ArgumentClass args [2];
747         MonoMarshalType *info = NULL;
748         MonoMarshalField *fields = NULL;
749         MonoClass *klass;
750         gboolean pass_on_stack = FALSE;
751
752         klass = mono_class_from_mono_type (type);
753         size = mini_type_stack_size_full (&klass->byval_arg, NULL, sig->pinvoke);
754 #ifndef TARGET_WIN32
755         if (!sig->pinvoke && ((is_return && (size == 8)) || (!is_return && (size <= 16)))) {
756                 /* We pass and return vtypes of size 8 in a register */
757         } else if (!sig->pinvoke || (size == 0) || (size > 16)) {
758                 pass_on_stack = TRUE;
759         }
760 #else
761         if (!sig->pinvoke) {
762                 pass_on_stack = TRUE;
763         }
764 #endif
765
766         /* If this struct can't be split up naturally into 8-byte */
767         /* chunks (registers), pass it on the stack.              */
768         if (sig->pinvoke && !pass_on_stack) {
769                 guint32 align;
770                 guint32 field_size;
771
772                 info = mono_marshal_load_type_info (klass);
773                 g_assert (info);
774
775                 /*
776                  * Collect field information recursively to be able to
777                  * handle nested structures.
778                  */
779                 nfields = count_fields_nested (klass);
780                 fields = g_new0 (MonoMarshalField, nfields);
781                 collect_field_info_nested (klass, fields, 0, 0);
782
783                 for (i = 0; i < nfields; ++i) {
784                         field_size = mono_marshal_type_size (fields [i].field->type,
785                                                            fields [i].mspec,
786                                                            &align, TRUE, klass->unicode);
787                         if ((fields [i].offset < 8) && (fields [i].offset + field_size) > 8) {
788                                 pass_on_stack = TRUE;
789                                 break;
790                         }
791                 }
792         }
793
794 #ifndef TARGET_WIN32
795         if (size == 0) {
796                 ainfo->storage = ArgValuetypeInReg;
797                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
798                 return;
799         }
800 #endif
801
802         if (pass_on_stack) {
803                 /* Allways pass in memory */
804                 ainfo->offset = *stack_size;
805                 *stack_size += ALIGN_TO (size, 8);
806                 ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
807
808                 g_free (fields);
809                 return;
810         }
811
812         /* FIXME: Handle structs smaller than 8 bytes */
813         //if ((size % 8) != 0)
814         //      NOT_IMPLEMENTED;
815
816         if (size > 8)
817                 nquads = 2;
818         else
819                 nquads = 1;
820
821         if (!sig->pinvoke) {
822                 int n = mono_class_value_size (klass, NULL);
823
824                 quadsize [0] = n >= 8 ? 8 : n;
825                 quadsize [1] = n >= 8 ? MAX (n - 8, 8) : 0;
826
827                 /* Always pass in 1 or 2 integer registers */
828                 args [0] = ARG_CLASS_INTEGER;
829                 args [1] = ARG_CLASS_INTEGER;
830                 /* Only the simplest cases are supported */
831                 if (is_return && nquads != 1) {
832                         args [0] = ARG_CLASS_MEMORY;
833                         args [1] = ARG_CLASS_MEMORY;
834                 }
835         } else {
836                 /*
837                  * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
838                  * The X87 and SSEUP stuff is left out since there are no such types in
839                  * the CLR.
840                  */
841                 g_assert (info);
842
843                 if (!fields) {
844                         ainfo->storage = ArgValuetypeInReg;
845                         ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
846                         return;
847                 }
848
849 #ifndef TARGET_WIN32
850                 if (info->native_size > 16) {
851                         ainfo->offset = *stack_size;
852                         *stack_size += ALIGN_TO (info->native_size, 8);
853                         ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
854
855                         g_free (fields);
856                         return;
857                 }
858 #else
859                 switch (info->native_size) {
860                 case 1: case 2: case 4: case 8:
861                         break;
862                 default:
863                         if (is_return) {
864                                 ainfo->storage = ArgValuetypeAddrInIReg;
865                                 ainfo->offset = *stack_size;
866                                 *stack_size += ALIGN_TO (info->native_size, 8);
867                         }
868                         else {
869                                 ainfo->storage = ArgValuetypeAddrInIReg;
870
871                                 if (*gr < PARAM_REGS) {
872                                         ainfo->pair_storage [0] = ArgInIReg;
873                                         ainfo->pair_regs [0] = param_regs [*gr];
874                                         (*gr) ++;
875                                 }
876                                 else {
877                                         ainfo->pair_storage [0] = ArgOnStack;
878                                         ainfo->offset = *stack_size;
879                                         *stack_size += 8;
880                                 }
881                         }
882
883                         g_free (fields);
884                         return;
885                 }
886 #endif
887
888                 args [0] = ARG_CLASS_NO_CLASS;
889                 args [1] = ARG_CLASS_NO_CLASS;
890                 for (quad = 0; quad < nquads; ++quad) {
891                         int size;
892                         guint32 align;
893                         ArgumentClass class1;
894                 
895                         if (nfields == 0)
896                                 class1 = ARG_CLASS_MEMORY;
897                         else
898                                 class1 = ARG_CLASS_NO_CLASS;
899                         for (i = 0; i < nfields; ++i) {
900                                 size = mono_marshal_type_size (fields [i].field->type,
901                                                                                            fields [i].mspec,
902                                                                                            &align, TRUE, klass->unicode);
903                                 if ((fields [i].offset < 8) && (fields [i].offset + size) > 8) {
904                                         /* Unaligned field */
905                                         NOT_IMPLEMENTED;
906                                 }
907
908                                 /* Skip fields in other quad */
909                                 if ((quad == 0) && (fields [i].offset >= 8))
910                                         continue;
911                                 if ((quad == 1) && (fields [i].offset < 8))
912                                         continue;
913
914                                 /* How far into this quad this data extends.*/
915                                 /* (8 is size of quad) */
916                                 quadsize [quad] = fields [i].offset + size - (quad * 8);
917
918                                 class1 = merge_argument_class_from_type (fields [i].field->type, class1);
919                         }
920                         g_assert (class1 != ARG_CLASS_NO_CLASS);
921                         args [quad] = class1;
922                 }
923         }
924
925         g_free (fields);
926
927         /* Post merger cleanup */
928         if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY))
929                 args [0] = args [1] = ARG_CLASS_MEMORY;
930
931         /* Allocate registers */
932         {
933                 int orig_gr = *gr;
934                 int orig_fr = *fr;
935
936                 while (quadsize [0] != 1 && quadsize [0] != 2 && quadsize [0] != 4 && quadsize [0] != 8)
937                         quadsize [0] ++;
938                 while (quadsize [1] != 1 && quadsize [1] != 2 && quadsize [1] != 4 && quadsize [1] != 8)
939                         quadsize [1] ++;
940
941                 ainfo->storage = ArgValuetypeInReg;
942                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
943                 g_assert (quadsize [0] <= 8);
944                 g_assert (quadsize [1] <= 8);
945                 ainfo->pair_size [0] = quadsize [0];
946                 ainfo->pair_size [1] = quadsize [1];
947                 ainfo->nregs = nquads;
948                 for (quad = 0; quad < nquads; ++quad) {
949                         switch (args [quad]) {
950                         case ARG_CLASS_INTEGER:
951                                 if (*gr >= PARAM_REGS)
952                                         args [quad] = ARG_CLASS_MEMORY;
953                                 else {
954                                         ainfo->pair_storage [quad] = ArgInIReg;
955                                         if (is_return)
956                                                 ainfo->pair_regs [quad] = return_regs [*gr];
957                                         else
958                                                 ainfo->pair_regs [quad] = param_regs [*gr];
959                                         (*gr) ++;
960                                 }
961                                 break;
962                         case ARG_CLASS_SSE:
963                                 if (*fr >= FLOAT_PARAM_REGS)
964                                         args [quad] = ARG_CLASS_MEMORY;
965                                 else {
966                                         if (quadsize[quad] <= 4)
967                                                 ainfo->pair_storage [quad] = ArgInFloatSSEReg;
968                                         else ainfo->pair_storage [quad] = ArgInDoubleSSEReg;
969                                         ainfo->pair_regs [quad] = *fr;
970                                         (*fr) ++;
971                                 }
972                                 break;
973                         case ARG_CLASS_MEMORY:
974                                 break;
975                         default:
976                                 g_assert_not_reached ();
977                         }
978                 }
979
980                 if ((args [0] == ARG_CLASS_MEMORY) || (args [1] == ARG_CLASS_MEMORY)) {
981                         /* Revert possible register assignments */
982                         *gr = orig_gr;
983                         *fr = orig_fr;
984
985                         ainfo->offset = *stack_size;
986                         if (sig->pinvoke)
987                                 *stack_size += ALIGN_TO (info->native_size, 8);
988                         else
989                                 *stack_size += nquads * sizeof(mgreg_t);
990                         ainfo->storage = is_return ? ArgValuetypeAddrInIReg : ArgOnStack;
991                 }
992         }
993 }
994
995 /*
996  * get_call_info:
997  *
998  *  Obtain information about a call according to the calling convention.
999  * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement 
1000  * Draft Version 0.23" document for more information.
1001  */
1002 static CallInfo*
1003 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
1004 {
1005         guint32 i, gr, fr, pstart;
1006         MonoType *ret_type;
1007         int n = sig->hasthis + sig->param_count;
1008         guint32 stack_size = 0;
1009         CallInfo *cinfo;
1010         gboolean is_pinvoke = sig->pinvoke;
1011
1012         if (mp)
1013                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
1014         else
1015                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
1016
1017         cinfo->nargs = n;
1018
1019         gr = 0;
1020         fr = 0;
1021
1022 #ifdef TARGET_WIN32
1023         /* Reserve space where the callee can save the argument registers */
1024         stack_size = 4 * sizeof (mgreg_t);
1025 #endif
1026
1027         /* return value */
1028         ret_type = mini_get_underlying_type (sig->ret);
1029         switch (ret_type->type) {
1030         case MONO_TYPE_I1:
1031         case MONO_TYPE_U1:
1032         case MONO_TYPE_I2:
1033         case MONO_TYPE_U2:
1034         case MONO_TYPE_I4:
1035         case MONO_TYPE_U4:
1036         case MONO_TYPE_I:
1037         case MONO_TYPE_U:
1038         case MONO_TYPE_PTR:
1039         case MONO_TYPE_FNPTR:
1040         case MONO_TYPE_CLASS:
1041         case MONO_TYPE_OBJECT:
1042         case MONO_TYPE_SZARRAY:
1043         case MONO_TYPE_ARRAY:
1044         case MONO_TYPE_STRING:
1045                 cinfo->ret.storage = ArgInIReg;
1046                 cinfo->ret.reg = AMD64_RAX;
1047                 break;
1048         case MONO_TYPE_U8:
1049         case MONO_TYPE_I8:
1050                 cinfo->ret.storage = ArgInIReg;
1051                 cinfo->ret.reg = AMD64_RAX;
1052                 break;
1053         case MONO_TYPE_R4:
1054                 cinfo->ret.storage = ArgInFloatSSEReg;
1055                 cinfo->ret.reg = AMD64_XMM0;
1056                 break;
1057         case MONO_TYPE_R8:
1058                 cinfo->ret.storage = ArgInDoubleSSEReg;
1059                 cinfo->ret.reg = AMD64_XMM0;
1060                 break;
1061         case MONO_TYPE_GENERICINST:
1062                 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
1063                         cinfo->ret.storage = ArgInIReg;
1064                         cinfo->ret.reg = AMD64_RAX;
1065                         break;
1066                 }
1067                 if (mini_is_gsharedvt_type (ret_type)) {
1068                         cinfo->ret.storage = ArgValuetypeAddrInIReg;
1069                         break;
1070                 }
1071                 /* fall through */
1072 #if defined( __native_client_codegen__ )
1073         case MONO_TYPE_TYPEDBYREF:
1074 #endif
1075         case MONO_TYPE_VALUETYPE: {
1076                 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
1077
1078                 add_valuetype (sig, &cinfo->ret, ret_type, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
1079                 g_assert (cinfo->ret.storage != ArgInIReg);
1080                 break;
1081         }
1082 #if !defined( __native_client_codegen__ )
1083         case MONO_TYPE_TYPEDBYREF:
1084                 /* Same as a valuetype with size 24 */
1085                 cinfo->ret.storage = ArgValuetypeAddrInIReg;
1086                 break;
1087 #endif
1088         case MONO_TYPE_VAR:
1089         case MONO_TYPE_MVAR:
1090                 g_assert (mini_is_gsharedvt_type (ret_type));
1091                 cinfo->ret.storage = ArgValuetypeAddrInIReg;
1092                 break;
1093         case MONO_TYPE_VOID:
1094                 break;
1095         default:
1096                 g_error ("Can't handle as return value 0x%x", ret_type->type);
1097         }
1098
1099         pstart = 0;
1100         /*
1101          * To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
1102          * the first argument, allowing 'this' to be always passed in the first arg reg.
1103          * Also do this if the first argument is a reference type, since virtual calls
1104          * are sometimes made using calli without sig->hasthis set, like in the delegate
1105          * invoke wrappers.
1106          */
1107         if (cinfo->ret.storage == ArgValuetypeAddrInIReg && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig->params [0]))))) {
1108                 if (sig->hasthis) {
1109                         add_general (&gr, &stack_size, cinfo->args + 0);
1110                 } else {
1111                         add_general (&gr, &stack_size, &cinfo->args [sig->hasthis + 0]);
1112                         pstart = 1;
1113                 }
1114                 add_general (&gr, &stack_size, &cinfo->ret);
1115                 cinfo->ret.storage = ArgValuetypeAddrInIReg;
1116                 cinfo->vret_arg_index = 1;
1117         } else {
1118                 /* this */
1119                 if (sig->hasthis)
1120                         add_general (&gr, &stack_size, cinfo->args + 0);
1121
1122                 if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
1123                         add_general (&gr, &stack_size, &cinfo->ret);
1124                         cinfo->ret.storage = ArgValuetypeAddrInIReg;
1125                 }
1126         }
1127
1128         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
1129                 gr = PARAM_REGS;
1130                 fr = FLOAT_PARAM_REGS;
1131                 
1132                 /* Emit the signature cookie just before the implicit arguments */
1133                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
1134         }
1135
1136         for (i = pstart; i < sig->param_count; ++i) {
1137                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
1138                 MonoType *ptype;
1139
1140 #ifdef TARGET_WIN32
1141                 /* The float param registers and other param registers must be the same index on Windows x64.*/
1142                 if (gr > fr)
1143                         fr = gr;
1144                 else if (fr > gr)
1145                         gr = fr;
1146 #endif
1147
1148                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
1149                         /* We allways pass the sig cookie on the stack for simplicity */
1150                         /* 
1151                          * Prevent implicit arguments + the sig cookie from being passed 
1152                          * in registers.
1153                          */
1154                         gr = PARAM_REGS;
1155                         fr = FLOAT_PARAM_REGS;
1156
1157                         /* Emit the signature cookie just before the implicit arguments */
1158                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
1159                 }
1160
1161                 ptype = mini_get_underlying_type (sig->params [i]);
1162                 switch (ptype->type) {
1163                 case MONO_TYPE_I1:
1164                 case MONO_TYPE_U1:
1165                         add_general (&gr, &stack_size, ainfo);
1166                         break;
1167                 case MONO_TYPE_I2:
1168                 case MONO_TYPE_U2:
1169                         add_general (&gr, &stack_size, ainfo);
1170                         break;
1171                 case MONO_TYPE_I4:
1172                 case MONO_TYPE_U4:
1173                         add_general (&gr, &stack_size, ainfo);
1174                         break;
1175                 case MONO_TYPE_I:
1176                 case MONO_TYPE_U:
1177                 case MONO_TYPE_PTR:
1178                 case MONO_TYPE_FNPTR:
1179                 case MONO_TYPE_CLASS:
1180                 case MONO_TYPE_OBJECT:
1181                 case MONO_TYPE_STRING:
1182                 case MONO_TYPE_SZARRAY:
1183                 case MONO_TYPE_ARRAY:
1184                         add_general (&gr, &stack_size, ainfo);
1185                         break;
1186                 case MONO_TYPE_GENERICINST:
1187                         if (!mono_type_generic_inst_is_valuetype (ptype)) {
1188                                 add_general (&gr, &stack_size, ainfo);
1189                                 break;
1190                         }
1191                         if (mini_is_gsharedvt_type (ptype)) {
1192                                 /* gsharedvt arguments are passed by ref */
1193                                 add_general (&gr, &stack_size, ainfo);
1194                                 if (ainfo->storage == ArgInIReg)
1195                                         ainfo->storage = ArgGSharedVtInReg;
1196                                 else
1197                                         ainfo->storage = ArgGSharedVtOnStack;
1198                                 break;
1199                         }
1200                         /* fall through */
1201                 case MONO_TYPE_VALUETYPE:
1202                 case MONO_TYPE_TYPEDBYREF:
1203                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
1204                         break;
1205                 case MONO_TYPE_U8:
1206
1207                 case MONO_TYPE_I8:
1208                         add_general (&gr, &stack_size, ainfo);
1209                         break;
1210                 case MONO_TYPE_R4:
1211                         add_float (&fr, &stack_size, ainfo, FALSE);
1212                         break;
1213                 case MONO_TYPE_R8:
1214                         add_float (&fr, &stack_size, ainfo, TRUE);
1215                         break;
1216                 case MONO_TYPE_VAR:
1217                 case MONO_TYPE_MVAR:
1218                         /* gsharedvt arguments are passed by ref */
1219                         g_assert (mini_is_gsharedvt_type (ptype));
1220                         add_general (&gr, &stack_size, ainfo);
1221                         if (ainfo->storage == ArgInIReg)
1222                                 ainfo->storage = ArgGSharedVtInReg;
1223                         else
1224                                 ainfo->storage = ArgGSharedVtOnStack;
1225                         break;
1226                 default:
1227                         g_assert_not_reached ();
1228                 }
1229         }
1230
1231         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
1232                 gr = PARAM_REGS;
1233                 fr = FLOAT_PARAM_REGS;
1234                 
1235                 /* Emit the signature cookie just before the implicit arguments */
1236                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
1237         }
1238
1239         cinfo->stack_usage = stack_size;
1240         cinfo->reg_usage = gr;
1241         cinfo->freg_usage = fr;
1242         return cinfo;
1243 }
1244
1245 /*
1246  * mono_arch_get_argument_info:
1247  * @csig:  a method signature
1248  * @param_count: the number of parameters to consider
1249  * @arg_info: an array to store the result infos
1250  *
1251  * Gathers information on parameters such as size, alignment and
1252  * padding. arg_info should be large enought to hold param_count + 1 entries. 
1253  *
1254  * Returns the size of the argument area on the stack.
1255  */
1256 int
1257 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
1258 {
1259         int k;
1260         CallInfo *cinfo = get_call_info (NULL, csig);
1261         guint32 args_size = cinfo->stack_usage;
1262
1263         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
1264         if (csig->hasthis) {
1265                 arg_info [0].offset = 0;
1266         }
1267
1268         for (k = 0; k < param_count; k++) {
1269                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
1270                 /* FIXME: */
1271                 arg_info [k + 1].size = 0;
1272         }
1273
1274         g_free (cinfo);
1275
1276         return args_size;
1277 }
1278
1279 gboolean
1280 mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig)
1281 {
1282         CallInfo *c1, *c2;
1283         gboolean res;
1284         MonoType *callee_ret;
1285
1286         c1 = get_call_info (NULL, caller_sig);
1287         c2 = get_call_info (NULL, callee_sig);
1288         res = c1->stack_usage >= c2->stack_usage;
1289         callee_ret = mini_get_underlying_type (callee_sig->ret);
1290         if (callee_ret && MONO_TYPE_ISSTRUCT (callee_ret) && c2->ret.storage != ArgValuetypeInReg)
1291                 /* An address on the callee's stack is passed as the first argument */
1292                 res = FALSE;
1293
1294         g_free (c1);
1295         g_free (c2);
1296
1297         return res;
1298 }
1299
1300 /*
1301  * Initialize the cpu to execute managed code.
1302  */
1303 void
1304 mono_arch_cpu_init (void)
1305 {
1306 #ifndef _MSC_VER
1307         guint16 fpcw;
1308
1309         /* spec compliance requires running with double precision */
1310         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
1311         fpcw &= ~X86_FPCW_PRECC_MASK;
1312         fpcw |= X86_FPCW_PREC_DOUBLE;
1313         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
1314         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
1315 #else
1316         /* TODO: This is crashing on Win64 right now.
1317         * _control87 (_PC_53, MCW_PC);
1318         */
1319 #endif
1320 }
1321
1322 /*
1323  * Initialize architecture specific code.
1324  */
1325 void
1326 mono_arch_init (void)
1327 {
1328         int flags;
1329
1330         mono_mutex_init_recursive (&mini_arch_mutex);
1331 #if defined(__native_client_codegen__)
1332         mono_native_tls_alloc (&nacl_instruction_depth, NULL);
1333         mono_native_tls_set_value (nacl_instruction_depth, (gpointer)0);
1334         mono_native_tls_alloc (&nacl_rex_tag, NULL);
1335         mono_native_tls_alloc (&nacl_legacy_prefix_tag, NULL);
1336 #endif
1337
1338 #ifdef MONO_ARCH_NOMAP32BIT
1339         flags = MONO_MMAP_READ;
1340         /* amd64_mov_reg_imm () + amd64_mov_reg_membase () */
1341         breakpoint_size = 13;
1342         breakpoint_fault_size = 3;
1343 #else
1344         flags = MONO_MMAP_READ|MONO_MMAP_32BIT;
1345         /* amd64_mov_reg_mem () */
1346         breakpoint_size = 8;
1347         breakpoint_fault_size = 8;
1348 #endif
1349
1350         /* amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4); */
1351         single_step_fault_size = 4;
1352
1353         ss_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
1354         bp_trigger_page = mono_valloc (NULL, mono_pagesize (), flags);
1355         mono_mprotect (bp_trigger_page, mono_pagesize (), 0);
1356
1357         mono_aot_register_jit_icall ("mono_amd64_throw_exception", mono_amd64_throw_exception);
1358         mono_aot_register_jit_icall ("mono_amd64_throw_corlib_exception", mono_amd64_throw_corlib_exception);
1359         mono_aot_register_jit_icall ("mono_amd64_resume_unwind", mono_amd64_resume_unwind);
1360         mono_aot_register_jit_icall ("mono_amd64_get_original_ip", mono_amd64_get_original_ip);
1361 }
1362
1363 /*
1364  * Cleanup architecture specific code.
1365  */
1366 void
1367 mono_arch_cleanup (void)
1368 {
1369         mono_mutex_destroy (&mini_arch_mutex);
1370 #if defined(__native_client_codegen__)
1371         mono_native_tls_free (nacl_instruction_depth);
1372         mono_native_tls_free (nacl_rex_tag);
1373         mono_native_tls_free (nacl_legacy_prefix_tag);
1374 #endif
1375 }
1376
1377 /*
1378  * This function returns the optimizations supported on this cpu.
1379  */
1380 guint32
1381 mono_arch_cpu_optimizations (guint32 *exclude_mask)
1382 {
1383         guint32 opts = 0;
1384
1385         *exclude_mask = 0;
1386
1387         if (mono_hwcap_x86_has_cmov) {
1388                 opts |= MONO_OPT_CMOV;
1389
1390                 if (mono_hwcap_x86_has_fcmov)
1391                         opts |= MONO_OPT_FCMOV;
1392                 else
1393                         *exclude_mask |= MONO_OPT_FCMOV;
1394         } else {
1395                 *exclude_mask |= MONO_OPT_CMOV;
1396         }
1397
1398         return opts;
1399 }
1400
1401 /*
1402  * This function test for all SSE functions supported.
1403  *
1404  * Returns a bitmask corresponding to all supported versions.
1405  * 
1406  */
1407 guint32
1408 mono_arch_cpu_enumerate_simd_versions (void)
1409 {
1410         guint32 sse_opts = 0;
1411
1412         if (mono_hwcap_x86_has_sse1)
1413                 sse_opts |= SIMD_VERSION_SSE1;
1414
1415         if (mono_hwcap_x86_has_sse2)
1416                 sse_opts |= SIMD_VERSION_SSE2;
1417
1418         if (mono_hwcap_x86_has_sse3)
1419                 sse_opts |= SIMD_VERSION_SSE3;
1420
1421         if (mono_hwcap_x86_has_ssse3)
1422                 sse_opts |= SIMD_VERSION_SSSE3;
1423
1424         if (mono_hwcap_x86_has_sse41)
1425                 sse_opts |= SIMD_VERSION_SSE41;
1426
1427         if (mono_hwcap_x86_has_sse42)
1428                 sse_opts |= SIMD_VERSION_SSE42;
1429
1430         if (mono_hwcap_x86_has_sse4a)
1431                 sse_opts |= SIMD_VERSION_SSE4a;
1432
1433         return sse_opts;
1434 }
1435
1436 #ifndef DISABLE_JIT
1437
1438 GList *
1439 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
1440 {
1441         GList *vars = NULL;
1442         int i;
1443
1444         for (i = 0; i < cfg->num_varinfo; i++) {
1445                 MonoInst *ins = cfg->varinfo [i];
1446                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
1447
1448                 /* unused vars */
1449                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
1450                         continue;
1451
1452                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
1453                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
1454                         continue;
1455
1456                 if (mono_is_regsize_var (ins->inst_vtype)) {
1457                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
1458                         g_assert (i == vmv->idx);
1459                         vars = g_list_prepend (vars, vmv);
1460                 }
1461         }
1462
1463         vars = mono_varlist_sort (cfg, vars, 0);
1464
1465         return vars;
1466 }
1467
1468 /**
1469  * mono_arch_compute_omit_fp:
1470  *
1471  *   Determine whenever the frame pointer can be eliminated.
1472  */
1473 static void
1474 mono_arch_compute_omit_fp (MonoCompile *cfg)
1475 {
1476         MonoMethodSignature *sig;
1477         MonoMethodHeader *header;
1478         int i, locals_size;
1479         CallInfo *cinfo;
1480
1481         if (cfg->arch.omit_fp_computed)
1482                 return;
1483
1484         header = cfg->header;
1485
1486         sig = mono_method_signature (cfg->method);
1487
1488         if (!cfg->arch.cinfo)
1489                 cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
1490         cinfo = cfg->arch.cinfo;
1491
1492         /*
1493          * FIXME: Remove some of the restrictions.
1494          */
1495         cfg->arch.omit_fp = TRUE;
1496         cfg->arch.omit_fp_computed = TRUE;
1497
1498 #ifdef __native_client_codegen__
1499         /* NaCl modules may not change the value of RBP, so it cannot be */
1500         /* used as a normal register, but it can be used as a frame pointer*/
1501         cfg->disable_omit_fp = TRUE;
1502         cfg->arch.omit_fp = FALSE;
1503 #endif
1504
1505         if (cfg->disable_omit_fp)
1506                 cfg->arch.omit_fp = FALSE;
1507
1508         if (!debug_omit_fp ())
1509                 cfg->arch.omit_fp = FALSE;
1510         /*
1511         if (cfg->method->save_lmf)
1512                 cfg->arch.omit_fp = FALSE;
1513         */
1514         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
1515                 cfg->arch.omit_fp = FALSE;
1516         if (header->num_clauses)
1517                 cfg->arch.omit_fp = FALSE;
1518         if (cfg->param_area)
1519                 cfg->arch.omit_fp = FALSE;
1520         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1521                 cfg->arch.omit_fp = FALSE;
1522         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
1523                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
1524                 cfg->arch.omit_fp = FALSE;
1525         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1526                 ArgInfo *ainfo = &cinfo->args [i];
1527
1528                 if (ainfo->storage == ArgOnStack) {
1529                         /* 
1530                          * The stack offset can only be determined when the frame
1531                          * size is known.
1532                          */
1533                         cfg->arch.omit_fp = FALSE;
1534                 }
1535         }
1536
1537         locals_size = 0;
1538         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1539                 MonoInst *ins = cfg->varinfo [i];
1540                 int ialign;
1541
1542                 locals_size += mono_type_size (ins->inst_vtype, &ialign);
1543         }
1544 }
1545
1546 GList *
1547 mono_arch_get_global_int_regs (MonoCompile *cfg)
1548 {
1549         GList *regs = NULL;
1550
1551         mono_arch_compute_omit_fp (cfg);
1552
1553         if (cfg->arch.omit_fp)
1554                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1555
1556         /* We use the callee saved registers for global allocation */
1557         regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1558         regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1559         regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1560         regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1561 #ifndef __native_client_codegen__
1562         regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1563 #endif
1564 #ifdef TARGET_WIN32
1565         regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1566         regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1567 #endif
1568
1569         return regs;
1570 }
1571  
1572 GList*
1573 mono_arch_get_global_fp_regs (MonoCompile *cfg)
1574 {
1575         GList *regs = NULL;
1576         int i;
1577
1578         /* All XMM registers */
1579         for (i = 0; i < 16; ++i)
1580                 regs = g_list_prepend (regs, GINT_TO_POINTER (i));
1581
1582         return regs;
1583 }
1584
1585 GList*
1586 mono_arch_get_iregs_clobbered_by_call (MonoCallInst *call)
1587 {
1588         static GList *r = NULL;
1589
1590         if (r == NULL) {
1591                 GList *regs = NULL;
1592
1593                 regs = g_list_prepend (regs, (gpointer)AMD64_RBP);
1594                 regs = g_list_prepend (regs, (gpointer)AMD64_RBX);
1595                 regs = g_list_prepend (regs, (gpointer)AMD64_R12);
1596                 regs = g_list_prepend (regs, (gpointer)AMD64_R13);
1597                 regs = g_list_prepend (regs, (gpointer)AMD64_R14);
1598 #ifndef __native_client_codegen__
1599                 regs = g_list_prepend (regs, (gpointer)AMD64_R15);
1600 #endif
1601
1602                 regs = g_list_prepend (regs, (gpointer)AMD64_R10);
1603                 regs = g_list_prepend (regs, (gpointer)AMD64_R9);
1604                 regs = g_list_prepend (regs, (gpointer)AMD64_R8);
1605                 regs = g_list_prepend (regs, (gpointer)AMD64_RDI);
1606                 regs = g_list_prepend (regs, (gpointer)AMD64_RSI);
1607                 regs = g_list_prepend (regs, (gpointer)AMD64_RDX);
1608                 regs = g_list_prepend (regs, (gpointer)AMD64_RCX);
1609                 regs = g_list_prepend (regs, (gpointer)AMD64_RAX);
1610
1611                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1612         }
1613
1614         return r;
1615 }
1616
1617 GList*
1618 mono_arch_get_fregs_clobbered_by_call (MonoCallInst *call)
1619 {
1620         int i;
1621         static GList *r = NULL;
1622
1623         if (r == NULL) {
1624                 GList *regs = NULL;
1625
1626                 for (i = 0; i < AMD64_XMM_NREG; ++i)
1627                         regs = g_list_prepend (regs, GINT_TO_POINTER (MONO_MAX_IREGS + i));
1628
1629                 InterlockedCompareExchangePointer ((gpointer*)&r, regs, NULL);
1630         }
1631
1632         return r;
1633 }
1634
1635 /*
1636  * mono_arch_regalloc_cost:
1637  *
1638  *  Return the cost, in number of memory references, of the action of 
1639  * allocating the variable VMV into a register during global register
1640  * allocation.
1641  */
1642 guint32
1643 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
1644 {
1645         MonoInst *ins = cfg->varinfo [vmv->idx];
1646
1647         if (cfg->method->save_lmf)
1648                 /* The register is already saved */
1649                 /* substract 1 for the invisible store in the prolog */
1650                 return (ins->opcode == OP_ARG) ? 0 : 1;
1651         else
1652                 /* push+pop */
1653                 return (ins->opcode == OP_ARG) ? 1 : 2;
1654 }
1655
1656 /*
1657  * mono_arch_fill_argument_info:
1658  *
1659  *   Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments
1660  * of the method.
1661  */
1662 void
1663 mono_arch_fill_argument_info (MonoCompile *cfg)
1664 {
1665         MonoType *sig_ret;
1666         MonoMethodSignature *sig;
1667         MonoInst *ins;
1668         int i;
1669         CallInfo *cinfo;
1670
1671         sig = mono_method_signature (cfg->method);
1672
1673         cinfo = cfg->arch.cinfo;
1674         sig_ret = mini_get_underlying_type (sig->ret);
1675
1676         /*
1677          * Contrary to mono_arch_allocate_vars (), the information should describe
1678          * where the arguments are at the beginning of the method, not where they can be 
1679          * accessed during the execution of the method. The later makes no sense for the 
1680          * global register allocator, since a variable can be in more than one location.
1681          */
1682         if (sig_ret->type != MONO_TYPE_VOID) {
1683                 switch (cinfo->ret.storage) {
1684                 case ArgInIReg:
1685                 case ArgInFloatSSEReg:
1686                 case ArgInDoubleSSEReg:
1687                         if ((MONO_TYPE_ISSTRUCT (sig_ret) && !mono_class_from_mono_type (sig_ret)->enumtype) || ((sig_ret->type == MONO_TYPE_TYPEDBYREF) && cinfo->ret.storage == ArgValuetypeAddrInIReg)) {
1688                                 cfg->vret_addr->opcode = OP_REGVAR;
1689                                 cfg->vret_addr->inst_c0 = cinfo->ret.reg;
1690                         }
1691                         else {
1692                                 cfg->ret->opcode = OP_REGVAR;
1693                                 cfg->ret->inst_c0 = cinfo->ret.reg;
1694                         }
1695                         break;
1696                 case ArgValuetypeInReg:
1697                         cfg->ret->opcode = OP_REGOFFSET;
1698                         cfg->ret->inst_basereg = -1;
1699                         cfg->ret->inst_offset = -1;
1700                         break;
1701                 default:
1702                         g_assert_not_reached ();
1703                 }
1704         }
1705
1706         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1707                 ArgInfo *ainfo = &cinfo->args [i];
1708
1709                 ins = cfg->args [i];
1710
1711                 switch (ainfo->storage) {
1712                 case ArgInIReg:
1713                 case ArgInFloatSSEReg:
1714                 case ArgInDoubleSSEReg:
1715                         ins->opcode = OP_REGVAR;
1716                         ins->inst_c0 = ainfo->reg;
1717                         break;
1718                 case ArgOnStack:
1719                         ins->opcode = OP_REGOFFSET;
1720                         ins->inst_basereg = -1;
1721                         ins->inst_offset = -1;
1722                         break;
1723                 case ArgValuetypeInReg:
1724                         /* Dummy */
1725                         ins->opcode = OP_NOP;
1726                         break;
1727                 default:
1728                         g_assert_not_reached ();
1729                 }
1730         }
1731 }
1732  
1733 void
1734 mono_arch_allocate_vars (MonoCompile *cfg)
1735 {
1736         MonoType *sig_ret;
1737         MonoMethodSignature *sig;
1738         MonoInst *ins;
1739         int i, offset;
1740         guint32 locals_stack_size, locals_stack_align;
1741         gint32 *offsets;
1742         CallInfo *cinfo;
1743
1744         sig = mono_method_signature (cfg->method);
1745
1746         cinfo = cfg->arch.cinfo;
1747         sig_ret = mini_get_underlying_type (sig->ret);
1748
1749         mono_arch_compute_omit_fp (cfg);
1750
1751         /*
1752          * We use the ABI calling conventions for managed code as well.
1753          * Exception: valuetypes are only sometimes passed or returned in registers.
1754          */
1755
1756         /*
1757          * The stack looks like this:
1758          * <incoming arguments passed on the stack>
1759          * <return value>
1760          * <lmf/caller saved registers>
1761          * <locals>
1762          * <spill area>
1763          * <localloc area>  -> grows dynamically
1764          * <params area>
1765          */
1766
1767         if (cfg->arch.omit_fp) {
1768                 cfg->flags |= MONO_CFG_HAS_SPILLUP;
1769                 cfg->frame_reg = AMD64_RSP;
1770                 offset = 0;
1771         } else {
1772                 /* Locals are allocated backwards from %fp */
1773                 cfg->frame_reg = AMD64_RBP;
1774                 offset = 0;
1775         }
1776
1777         cfg->arch.saved_iregs = cfg->used_int_regs;
1778         if (cfg->method->save_lmf)
1779                 /* Save all callee-saved registers normally, and restore them when unwinding through an LMF */
1780                 cfg->arch.saved_iregs |= (1 << AMD64_RBX) | (1 << AMD64_R12) | (1 << AMD64_R13) | (1 << AMD64_R14) | (1 << AMD64_R15);
1781
1782         if (cfg->arch.omit_fp)
1783                 cfg->arch.reg_save_area_offset = offset;
1784         /* Reserve space for callee saved registers */
1785         for (i = 0; i < AMD64_NREG; ++i)
1786                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
1787                         offset += sizeof(mgreg_t);
1788                 }
1789         if (!cfg->arch.omit_fp)
1790                 cfg->arch.reg_save_area_offset = -offset;
1791
1792         if (sig_ret->type != MONO_TYPE_VOID) {
1793                 switch (cinfo->ret.storage) {
1794                 case ArgInIReg:
1795                 case ArgInFloatSSEReg:
1796                 case ArgInDoubleSSEReg:
1797                         cfg->ret->opcode = OP_REGVAR;
1798                         cfg->ret->inst_c0 = cinfo->ret.reg;
1799                         break;
1800                 case ArgValuetypeAddrInIReg:
1801                         /* The register is volatile */
1802                         cfg->vret_addr->opcode = OP_REGOFFSET;
1803                         cfg->vret_addr->inst_basereg = cfg->frame_reg;
1804                         if (cfg->arch.omit_fp) {
1805                                 cfg->vret_addr->inst_offset = offset;
1806                                 offset += 8;
1807                         } else {
1808                                 offset += 8;
1809                                 cfg->vret_addr->inst_offset = -offset;
1810                         }
1811                         if (G_UNLIKELY (cfg->verbose_level > 1)) {
1812                                 printf ("vret_addr =");
1813                                 mono_print_ins (cfg->vret_addr);
1814                         }
1815                         break;
1816                 case ArgValuetypeInReg:
1817                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1818                         cfg->ret->opcode = OP_REGOFFSET;
1819                         cfg->ret->inst_basereg = cfg->frame_reg;
1820                         if (cfg->arch.omit_fp) {
1821                                 cfg->ret->inst_offset = offset;
1822                                 offset += cinfo->ret.pair_storage [1] == ArgNone ? 8 : 16;
1823                         } else {
1824                                 offset += cinfo->ret.pair_storage [1] == ArgNone ? 8 : 16;
1825                                 cfg->ret->inst_offset = - offset;
1826                         }
1827                         break;
1828                 default:
1829                         g_assert_not_reached ();
1830                 }
1831                 cfg->ret->dreg = cfg->ret->inst_c0;
1832         }
1833
1834         /* Allocate locals */
1835         offsets = mono_allocate_stack_slots (cfg, cfg->arch.omit_fp ? FALSE: TRUE, &locals_stack_size, &locals_stack_align);
1836         if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
1837                 char *mname = mono_method_full_name (cfg->method, TRUE);
1838                 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
1839                 cfg->exception_message = g_strdup_printf ("Method %s stack is too big.", mname);
1840                 g_free (mname);
1841                 return;
1842         }
1843                 
1844         if (locals_stack_align) {
1845                 offset += (locals_stack_align - 1);
1846                 offset &= ~(locals_stack_align - 1);
1847         }
1848         if (cfg->arch.omit_fp) {
1849                 cfg->locals_min_stack_offset = offset;
1850                 cfg->locals_max_stack_offset = offset + locals_stack_size;
1851         } else {
1852                 cfg->locals_min_stack_offset = - (offset + locals_stack_size);
1853                 cfg->locals_max_stack_offset = - offset;
1854         }
1855                 
1856         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
1857                 if (offsets [i] != -1) {
1858                         MonoInst *ins = cfg->varinfo [i];
1859                         ins->opcode = OP_REGOFFSET;
1860                         ins->inst_basereg = cfg->frame_reg;
1861                         if (cfg->arch.omit_fp)
1862                                 ins->inst_offset = (offset + offsets [i]);
1863                         else
1864                                 ins->inst_offset = - (offset + offsets [i]);
1865                         //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
1866                 }
1867         }
1868         offset += locals_stack_size;
1869
1870         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
1871                 g_assert (!cfg->arch.omit_fp);
1872                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1873                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1874         }
1875
1876         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1877                 ins = cfg->args [i];
1878                 if (ins->opcode != OP_REGVAR) {
1879                         ArgInfo *ainfo = &cinfo->args [i];
1880                         gboolean inreg = TRUE;
1881
1882                         /* FIXME: Allocate volatile arguments to registers */
1883                         if (ins->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
1884                                 inreg = FALSE;
1885
1886                         /* 
1887                          * Under AMD64, all registers used to pass arguments to functions
1888                          * are volatile across calls.
1889                          * FIXME: Optimize this.
1890                          */
1891                         if ((ainfo->storage == ArgInIReg) || (ainfo->storage == ArgInFloatSSEReg) || (ainfo->storage == ArgInDoubleSSEReg) || (ainfo->storage == ArgValuetypeInReg) || (ainfo->storage == ArgGSharedVtInReg))
1892                                 inreg = FALSE;
1893
1894                         ins->opcode = OP_REGOFFSET;
1895
1896                         switch (ainfo->storage) {
1897                         case ArgInIReg:
1898                         case ArgInFloatSSEReg:
1899                         case ArgInDoubleSSEReg:
1900                         case ArgGSharedVtInReg:
1901                                 if (inreg) {
1902                                         ins->opcode = OP_REGVAR;
1903                                         ins->dreg = ainfo->reg;
1904                                 }
1905                                 break;
1906                         case ArgOnStack:
1907                         case ArgGSharedVtOnStack:
1908                                 g_assert (!cfg->arch.omit_fp);
1909                                 ins->opcode = OP_REGOFFSET;
1910                                 ins->inst_basereg = cfg->frame_reg;
1911                                 ins->inst_offset = ainfo->offset + ARGS_OFFSET;
1912                                 break;
1913                         case ArgValuetypeInReg:
1914                                 break;
1915                         case ArgValuetypeAddrInIReg: {
1916                                 MonoInst *indir;
1917                                 g_assert (!cfg->arch.omit_fp);
1918                                 
1919                                 MONO_INST_NEW (cfg, indir, 0);
1920                                 indir->opcode = OP_REGOFFSET;
1921                                 if (ainfo->pair_storage [0] == ArgInIReg) {
1922                                         indir->inst_basereg = cfg->frame_reg;
1923                                         offset = ALIGN_TO (offset, sizeof (gpointer));
1924                                         offset += (sizeof (gpointer));
1925                                         indir->inst_offset = - offset;
1926                                 }
1927                                 else {
1928                                         indir->inst_basereg = cfg->frame_reg;
1929                                         indir->inst_offset = ainfo->offset + ARGS_OFFSET;
1930                                 }
1931                                 
1932                                 ins->opcode = OP_VTARG_ADDR;
1933                                 ins->inst_left = indir;
1934                                 
1935                                 break;
1936                         }
1937                         default:
1938                                 NOT_IMPLEMENTED;
1939                         }
1940
1941                         if (!inreg && (ainfo->storage != ArgOnStack) && (ainfo->storage != ArgValuetypeAddrInIReg)) {
1942                                 ins->opcode = OP_REGOFFSET;
1943                                 ins->inst_basereg = cfg->frame_reg;
1944                                 /* These arguments are saved to the stack in the prolog */
1945                                 offset = ALIGN_TO (offset, sizeof(mgreg_t));
1946                                 if (cfg->arch.omit_fp) {
1947                                         ins->inst_offset = offset;
1948                                         offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t);
1949                                         // Arguments are yet supported by the stack map creation code
1950                                         //cfg->locals_max_stack_offset = MAX (cfg->locals_max_stack_offset, offset);
1951                                 } else {
1952                                         offset += (ainfo->storage == ArgValuetypeInReg) ? ainfo->nregs * sizeof (mgreg_t) : sizeof (mgreg_t);
1953                                         ins->inst_offset = - offset;
1954                                         //cfg->locals_min_stack_offset = MIN (cfg->locals_min_stack_offset, offset);
1955                                 }
1956                         }
1957                 }
1958         }
1959
1960         cfg->stack_offset = offset;
1961 }
1962
1963 void
1964 mono_arch_create_vars (MonoCompile *cfg)
1965 {
1966         MonoMethodSignature *sig;
1967         CallInfo *cinfo;
1968         MonoType *sig_ret;
1969
1970         sig = mono_method_signature (cfg->method);
1971
1972         if (!cfg->arch.cinfo)
1973                 cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
1974         cinfo = cfg->arch.cinfo;
1975
1976         if (cinfo->ret.storage == ArgValuetypeInReg)
1977                 cfg->ret_var_is_local = TRUE;
1978
1979         sig_ret = mini_get_underlying_type (sig->ret);
1980         if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
1981                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1982                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1983                         printf ("vret_addr = ");
1984                         mono_print_ins (cfg->vret_addr);
1985                 }
1986         }
1987
1988         if (cfg->gen_sdb_seq_points) {
1989                 MonoInst *ins;
1990
1991                 if (cfg->compile_aot) {
1992                         MonoInst *ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1993                         ins->flags |= MONO_INST_VOLATILE;
1994                         cfg->arch.seq_point_info_var = ins;
1995
1996                         ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1997                         ins->flags |= MONO_INST_VOLATILE;
1998                         cfg->arch.ss_tramp_var = ins;
1999                 }
2000
2001             ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
2002                 ins->flags |= MONO_INST_VOLATILE;
2003                 cfg->arch.ss_trigger_page_var = ins;
2004         }
2005
2006         if (cfg->method->save_lmf)
2007                 cfg->create_lmf_var = TRUE;
2008
2009         if (cfg->method->save_lmf) {
2010                 cfg->lmf_ir = TRUE;
2011 #if !defined(TARGET_WIN32)
2012                 if (mono_get_lmf_tls_offset () != -1 && !optimize_for_xen)
2013                         cfg->lmf_ir_mono_lmf = TRUE;
2014 #endif
2015         }
2016 }
2017
2018 static void
2019 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *tree)
2020 {
2021         MonoInst *ins;
2022
2023         switch (storage) {
2024         case ArgInIReg:
2025                 MONO_INST_NEW (cfg, ins, OP_MOVE);
2026                 ins->dreg = mono_alloc_ireg_copy (cfg, tree->dreg);
2027                 ins->sreg1 = tree->dreg;
2028                 MONO_ADD_INS (cfg->cbb, ins);
2029                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, FALSE);
2030                 break;
2031         case ArgInFloatSSEReg:
2032                 MONO_INST_NEW (cfg, ins, OP_AMD64_SET_XMMREG_R4);
2033                 ins->dreg = mono_alloc_freg (cfg);
2034                 ins->sreg1 = tree->dreg;
2035                 MONO_ADD_INS (cfg->cbb, ins);
2036
2037                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
2038                 break;
2039         case ArgInDoubleSSEReg:
2040                 MONO_INST_NEW (cfg, ins, OP_FMOVE);
2041                 ins->dreg = mono_alloc_freg (cfg);
2042                 ins->sreg1 = tree->dreg;
2043                 MONO_ADD_INS (cfg->cbb, ins);
2044
2045                 mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, reg, TRUE);
2046
2047                 break;
2048         default:
2049                 g_assert_not_reached ();
2050         }
2051 }
2052
2053 static int
2054 arg_storage_to_load_membase (ArgStorage storage)
2055 {
2056         switch (storage) {
2057         case ArgInIReg:
2058 #if defined(__mono_ilp32__)
2059                 return OP_LOADI8_MEMBASE;
2060 #else
2061                 return OP_LOAD_MEMBASE;
2062 #endif
2063         case ArgInDoubleSSEReg:
2064                 return OP_LOADR8_MEMBASE;
2065         case ArgInFloatSSEReg:
2066                 return OP_LOADR4_MEMBASE;
2067         default:
2068                 g_assert_not_reached ();
2069         }
2070
2071         return -1;
2072 }
2073
2074 static void
2075 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
2076 {
2077         MonoMethodSignature *tmp_sig;
2078         int sig_reg;
2079
2080         if (call->tail_call)
2081                 NOT_IMPLEMENTED;
2082
2083         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
2084                         
2085         /*
2086          * mono_ArgIterator_Setup assumes the signature cookie is 
2087          * passed first and all the arguments which were before it are
2088          * passed on the stack after the signature. So compensate by 
2089          * passing a different signature.
2090          */
2091         tmp_sig = mono_metadata_signature_dup_full (cfg->method->klass->image, call->signature);
2092         tmp_sig->param_count -= call->signature->sentinelpos;
2093         tmp_sig->sentinelpos = 0;
2094         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
2095
2096         sig_reg = mono_alloc_ireg (cfg);
2097         MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig);
2098
2099         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, cinfo->sig_cookie.offset, sig_reg);
2100 }
2101
2102 #ifdef ENABLE_LLVM
2103 static inline LLVMArgStorage
2104 arg_storage_to_llvm_arg_storage (MonoCompile *cfg, ArgStorage storage)
2105 {
2106         switch (storage) {
2107         case ArgInIReg:
2108                 return LLVMArgInIReg;
2109         case ArgNone:
2110                 return LLVMArgNone;
2111         case ArgGSharedVtInReg:
2112         case ArgGSharedVtOnStack:
2113                 return LLVMArgGSharedVt;
2114         default:
2115                 g_assert_not_reached ();
2116                 return LLVMArgNone;
2117         }
2118 }
2119
2120 LLVMCallInfo*
2121 mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
2122 {
2123         int i, n;
2124         CallInfo *cinfo;
2125         ArgInfo *ainfo;
2126         int j;
2127         LLVMCallInfo *linfo;
2128         MonoType *t, *sig_ret;
2129
2130         n = sig->param_count + sig->hasthis;
2131         sig_ret = mini_get_underlying_type (sig->ret);
2132
2133         cinfo = get_call_info (cfg->mempool, sig);
2134
2135         linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
2136
2137         /*
2138          * LLVM always uses the native ABI while we use our own ABI, the
2139          * only difference is the handling of vtypes:
2140          * - we only pass/receive them in registers in some cases, and only 
2141          *   in 1 or 2 integer registers.
2142          */
2143         if (cinfo->ret.storage == ArgValuetypeInReg) {
2144                 if (sig->pinvoke) {
2145                         cfg->exception_message = g_strdup ("pinvoke + vtypes");
2146                         cfg->disable_llvm = TRUE;
2147                         return linfo;
2148                 }
2149
2150                 linfo->ret.storage = LLVMArgVtypeInReg;
2151                 for (j = 0; j < 2; ++j)
2152                         linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
2153         }
2154
2155         if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
2156                 /* Vtype returned using a hidden argument */
2157                 linfo->ret.storage = LLVMArgVtypeRetAddr;
2158                 linfo->vret_arg_index = cinfo->vret_arg_index;
2159         }
2160
2161         for (i = 0; i < n; ++i) {
2162                 ainfo = cinfo->args + i;
2163
2164                 if (i >= sig->hasthis)
2165                         t = sig->params [i - sig->hasthis];
2166                 else
2167                         t = &mono_defaults.int_class->byval_arg;
2168
2169                 linfo->args [i].storage = LLVMArgNone;
2170
2171                 switch (ainfo->storage) {
2172                 case ArgInIReg:
2173                         linfo->args [i].storage = LLVMArgInIReg;
2174                         break;
2175                 case ArgInDoubleSSEReg:
2176                 case ArgInFloatSSEReg:
2177                         linfo->args [i].storage = LLVMArgInFPReg;
2178                         break;
2179                 case ArgOnStack:
2180                         if (MONO_TYPE_ISSTRUCT (t)) {
2181                                 linfo->args [i].storage = LLVMArgVtypeByVal;
2182                         } else {
2183                                 linfo->args [i].storage = LLVMArgInIReg;
2184                                 if (!t->byref) {
2185                                         if (t->type == MONO_TYPE_R4)
2186                                                 linfo->args [i].storage = LLVMArgInFPReg;
2187                                         else if (t->type == MONO_TYPE_R8)
2188                                                 linfo->args [i].storage = LLVMArgInFPReg;
2189                                 }
2190                         }
2191                         break;
2192                 case ArgValuetypeInReg:
2193                         if (sig->pinvoke) {
2194                                 cfg->exception_message = g_strdup ("pinvoke + vtypes");
2195                                 cfg->disable_llvm = TRUE;
2196                                 return linfo;
2197                         }
2198
2199                         linfo->args [i].storage = LLVMArgVtypeInReg;
2200                         for (j = 0; j < 2; ++j)
2201                                 linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
2202                         break;
2203                 case ArgGSharedVtInReg:
2204                 case ArgGSharedVtOnStack:
2205                         linfo->args [i].storage = LLVMArgGSharedVt;
2206                         break;
2207                 default:
2208                         cfg->exception_message = g_strdup ("ainfo->storage");
2209                         cfg->disable_llvm = TRUE;
2210                         break;
2211                 }
2212         }
2213
2214         return linfo;
2215 }
2216 #endif
2217
2218 void
2219 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
2220 {
2221         MonoInst *arg, *in;
2222         MonoMethodSignature *sig;
2223         MonoType *sig_ret;
2224         int i, n;
2225         CallInfo *cinfo;
2226         ArgInfo *ainfo;
2227
2228         sig = call->signature;
2229         n = sig->param_count + sig->hasthis;
2230
2231         cinfo = get_call_info (cfg->mempool, sig);
2232
2233         sig_ret = sig->ret;
2234
2235         if (COMPILE_LLVM (cfg)) {
2236                 /* We shouldn't be called in the llvm case */
2237                 cfg->disable_llvm = TRUE;
2238                 return;
2239         }
2240
2241         /* 
2242          * Emit all arguments which are passed on the stack to prevent register
2243          * allocation problems.
2244          */
2245         for (i = 0; i < n; ++i) {
2246                 MonoType *t;
2247                 ainfo = cinfo->args + i;
2248
2249                 in = call->args [i];
2250
2251                 if (sig->hasthis && i == 0)
2252                         t = &mono_defaults.object_class->byval_arg;
2253                 else
2254                         t = sig->params [i - sig->hasthis];
2255
2256                 t = mini_get_underlying_type (t);
2257                 if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call) {
2258                         if (!t->byref) {
2259                                 if (t->type == MONO_TYPE_R4)
2260                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2261                                 else if (t->type == MONO_TYPE_R8)
2262                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2263                                 else
2264                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2265                         } else {
2266                                 MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, in->dreg);
2267                         }
2268                         if (cfg->compute_gc_maps) {
2269                                 MonoInst *def;
2270
2271                                 EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg, def, ainfo->offset, t);
2272                         }
2273                 }
2274         }
2275
2276         /*
2277          * Emit all parameters passed in registers in non-reverse order for better readability
2278          * and to help the optimization in emit_prolog ().
2279          */
2280         for (i = 0; i < n; ++i) {
2281                 ainfo = cinfo->args + i;
2282
2283                 in = call->args [i];
2284
2285                 if (ainfo->storage == ArgInIReg)
2286                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
2287         }
2288
2289         for (i = n - 1; i >= 0; --i) {
2290                 MonoType *t;
2291
2292                 ainfo = cinfo->args + i;
2293
2294                 in = call->args [i];
2295
2296                 if (sig->hasthis && i == 0)
2297                         t = &mono_defaults.object_class->byval_arg;
2298                 else
2299                         t = sig->params [i - sig->hasthis];
2300                 t = mini_get_underlying_type (t);
2301
2302                 switch (ainfo->storage) {
2303                 case ArgInIReg:
2304                         /* Already done */
2305                         break;
2306                 case ArgInFloatSSEReg:
2307                 case ArgInDoubleSSEReg:
2308                         add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
2309                         break;
2310                 case ArgOnStack:
2311                 case ArgValuetypeInReg:
2312                 case ArgValuetypeAddrInIReg:
2313                 case ArgGSharedVtInReg:
2314                 case ArgGSharedVtOnStack: {
2315                         if (ainfo->storage == ArgOnStack && !MONO_TYPE_ISSTRUCT (t) && !call->tail_call)
2316                                 /* Already emitted above */
2317                                 break;
2318                         if (ainfo->storage == ArgOnStack && call->tail_call) {
2319                                 MonoInst *call_inst = (MonoInst*)call;
2320                                 cfg->args [i]->flags |= MONO_INST_VOLATILE;
2321                                 EMIT_NEW_ARGSTORE (cfg, call_inst, i, in);
2322                                 break;
2323                         }
2324
2325                         guint32 align;
2326                         guint32 size;
2327
2328                         if (t->type == MONO_TYPE_TYPEDBYREF) {
2329                                 size = sizeof (MonoTypedRef);
2330                                 align = sizeof (gpointer);
2331                         }
2332                         else {
2333                                 if (sig->pinvoke)
2334                                         size = mono_type_native_stack_size (t, &align);
2335                                 else {
2336                                         /*
2337                                          * Other backends use mono_type_stack_size (), but that
2338                                          * aligns the size to 8, which is larger than the size of
2339                                          * the source, leading to reads of invalid memory if the
2340                                          * source is at the end of address space.
2341                                          */
2342                                         size = mono_class_value_size (mono_class_from_mono_type (t), &align);
2343                                 }
2344                         }
2345
2346                         if (size >= 10000) {
2347                                 /* Avoid asserts in emit_memcpy () */
2348                                 cfg->exception_type = MONO_EXCEPTION_INVALID_PROGRAM;
2349                                 cfg->exception_message = g_strdup_printf ("Passing an argument of size '%d'.", size);
2350                                 /* Continue normally */
2351                         }
2352
2353                         if (size > 0) {
2354                                 MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
2355                                 arg->sreg1 = in->dreg;
2356                                 arg->klass = mono_class_from_mono_type (t);
2357                                 arg->backend.size = size;
2358                                 arg->inst_p0 = call;
2359                                 arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
2360                                 memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
2361
2362                                 MONO_ADD_INS (cfg->cbb, arg);
2363                         }
2364                         break;
2365                 }
2366                 default:
2367                         g_assert_not_reached ();
2368                 }
2369
2370                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos))
2371                         /* Emit the signature cookie just before the implicit arguments */
2372                         emit_sig_cookie (cfg, call, cinfo);
2373         }
2374
2375         /* Handle the case where there are no implicit arguments */
2376         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos))
2377                 emit_sig_cookie (cfg, call, cinfo);
2378
2379         switch (cinfo->ret.storage) {
2380         case ArgValuetypeInReg:
2381                 if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
2382                         /*
2383                          * Tell the JIT to use a more efficient calling convention: call using
2384                          * OP_CALL, compute the result location after the call, and save the
2385                          * result there.
2386                          */
2387                         call->vret_in_reg = TRUE;
2388                         /*
2389                          * Nullify the instruction computing the vret addr to enable
2390                          * future optimizations.
2391                          */
2392                         if (call->vret_var)
2393                                 NULLIFY_INS (call->vret_var);
2394                 } else {
2395                         if (call->tail_call)
2396                                 NOT_IMPLEMENTED;
2397                         /*
2398                          * The valuetype is in RAX:RDX after the call, need to be copied to
2399                          * the stack. Push the address here, so the call instruction can
2400                          * access it.
2401                          */
2402                         if (!cfg->arch.vret_addr_loc) {
2403                                 cfg->arch.vret_addr_loc = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
2404                                 /* Prevent it from being register allocated or optimized away */
2405                                 ((MonoInst*)cfg->arch.vret_addr_loc)->flags |= MONO_INST_VOLATILE;
2406                         }
2407
2408                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, ((MonoInst*)cfg->arch.vret_addr_loc)->dreg, call->vret_var->dreg);
2409                 }
2410                 break;
2411         case ArgValuetypeAddrInIReg: {
2412                 MonoInst *vtarg;
2413                 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
2414                 vtarg->sreg1 = call->vret_var->dreg;
2415                 vtarg->dreg = mono_alloc_preg (cfg);
2416                 MONO_ADD_INS (cfg->cbb, vtarg);
2417
2418                 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
2419                 break;
2420         }
2421         default:
2422                 break;
2423         }
2424
2425         if (cfg->method->save_lmf) {
2426                 MONO_INST_NEW (cfg, arg, OP_AMD64_SAVE_SP_TO_LMF);
2427                 MONO_ADD_INS (cfg->cbb, arg);
2428         }
2429
2430         call->stack_usage = cinfo->stack_usage;
2431 }
2432
2433 void
2434 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
2435 {
2436         MonoInst *arg;
2437         MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
2438         ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
2439         int size = ins->backend.size;
2440
2441         switch (ainfo->storage) {
2442         case ArgValuetypeInReg: {
2443                 MonoInst *load;
2444                 int part;
2445
2446                 for (part = 0; part < 2; ++part) {
2447                         if (ainfo->pair_storage [part] == ArgNone)
2448                                 continue;
2449
2450                         MONO_INST_NEW (cfg, load, arg_storage_to_load_membase (ainfo->pair_storage [part]));
2451                         load->inst_basereg = src->dreg;
2452                         load->inst_offset = part * sizeof(mgreg_t);
2453
2454                         switch (ainfo->pair_storage [part]) {
2455                         case ArgInIReg:
2456                                 load->dreg = mono_alloc_ireg (cfg);
2457                                 break;
2458                         case ArgInDoubleSSEReg:
2459                         case ArgInFloatSSEReg:
2460                                 load->dreg = mono_alloc_freg (cfg);
2461                                 break;
2462                         default:
2463                                 g_assert_not_reached ();
2464                         }
2465                         MONO_ADD_INS (cfg->cbb, load);
2466
2467                         add_outarg_reg (cfg, call, ainfo->pair_storage [part], ainfo->pair_regs [part], load);
2468                 }
2469                 break;
2470         }
2471         case ArgValuetypeAddrInIReg: {
2472                 MonoInst *vtaddr, *load;
2473                 vtaddr = mono_compile_create_var (cfg, &ins->klass->byval_arg, OP_LOCAL);
2474                 
2475                 MONO_INST_NEW (cfg, load, OP_LDADDR);
2476                 cfg->has_indirection = TRUE;
2477                 load->inst_p0 = vtaddr;
2478                 vtaddr->flags |= MONO_INST_INDIRECT;
2479                 load->type = STACK_MP;
2480                 load->klass = vtaddr->klass;
2481                 load->dreg = mono_alloc_ireg (cfg);
2482                 MONO_ADD_INS (cfg->cbb, load);
2483                 mini_emit_memcpy (cfg, load->dreg, 0, src->dreg, 0, size, 4);
2484
2485                 if (ainfo->pair_storage [0] == ArgInIReg) {
2486                         MONO_INST_NEW (cfg, arg, OP_X86_LEA_MEMBASE);
2487                         arg->dreg = mono_alloc_ireg (cfg);
2488                         arg->sreg1 = load->dreg;
2489                         arg->inst_imm = 0;
2490                         MONO_ADD_INS (cfg->cbb, arg);
2491                         mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, ainfo->pair_regs [0], FALSE);
2492                 } else {
2493                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, load->dreg);
2494                 }
2495                 break;
2496         }
2497         case ArgGSharedVtInReg:
2498                 /* Pass by addr */
2499                 mono_call_inst_add_outarg_reg (cfg, call, src->dreg, ainfo->reg, FALSE);
2500                 break;
2501         case ArgGSharedVtOnStack:
2502                 g_assert_not_reached ();
2503                 break;
2504         default:
2505                 if (size == 8) {
2506                         int dreg = mono_alloc_ireg (cfg);
2507
2508                         MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
2509                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, AMD64_RSP, ainfo->offset, dreg);
2510                 } else if (size <= 40) {
2511                         mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2512                 } else {
2513                         // FIXME: Code growth
2514                         mini_emit_memcpy (cfg, AMD64_RSP, ainfo->offset, src->dreg, 0, size, 4);
2515                 }
2516
2517                 if (cfg->compute_gc_maps) {
2518                         MonoInst *def;
2519                         EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg, def, ainfo->offset, &ins->klass->byval_arg);
2520                 }
2521         }
2522 }
2523
2524 void
2525 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
2526 {
2527         MonoType *ret = mini_get_underlying_type (mono_method_signature (method)->ret);
2528
2529         if (ret->type == MONO_TYPE_R4) {
2530                 if (COMPILE_LLVM (cfg))
2531                         MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2532                 else
2533                         MONO_EMIT_NEW_UNALU (cfg, OP_AMD64_SET_XMMREG_R4, cfg->ret->dreg, val->dreg);
2534                 return;
2535         } else if (ret->type == MONO_TYPE_R8) {
2536                 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
2537                 return;
2538         }
2539                         
2540         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
2541 }
2542
2543 #endif /* DISABLE_JIT */
2544
2545 #define EMIT_COND_BRANCH(ins,cond,sign) \
2546         if (ins->inst_true_bb->native_offset) { \
2547                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
2548         } else { \
2549                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
2550                 if ((cfg->opt & MONO_OPT_BRANCH) && \
2551             x86_is_imm8 (ins->inst_true_bb->max_offset - offset)) \
2552                         x86_branch8 (code, cond, 0, sign); \
2553                 else \
2554                         x86_branch32 (code, cond, 0, sign); \
2555 }
2556
2557 typedef struct {
2558         MonoMethodSignature *sig;
2559         CallInfo *cinfo;
2560 } ArchDynCallInfo;
2561
2562 static gboolean
2563 dyn_call_supported (MonoMethodSignature *sig, CallInfo *cinfo)
2564 {
2565         int i;
2566
2567 #ifdef HOST_WIN32
2568         return FALSE;
2569 #endif
2570
2571         switch (cinfo->ret.storage) {
2572         case ArgNone:
2573         case ArgInIReg:
2574                 break;
2575         case ArgValuetypeInReg: {
2576                 ArgInfo *ainfo = &cinfo->ret;
2577
2578                 if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2579                         return FALSE;
2580                 if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2581                         return FALSE;
2582                 break;
2583         }
2584         default:
2585                 return FALSE;
2586         }
2587
2588         for (i = 0; i < cinfo->nargs; ++i) {
2589                 ArgInfo *ainfo = &cinfo->args [i];
2590                 switch (ainfo->storage) {
2591                 case ArgInIReg:
2592                         break;
2593                 case ArgValuetypeInReg:
2594                         if (ainfo->pair_storage [0] != ArgNone && ainfo->pair_storage [0] != ArgInIReg)
2595                                 return FALSE;
2596                         if (ainfo->pair_storage [1] != ArgNone && ainfo->pair_storage [1] != ArgInIReg)
2597                                 return FALSE;
2598                         break;
2599                 default:
2600                         return FALSE;
2601                 }
2602         }
2603
2604         return TRUE;
2605 }
2606
2607 /*
2608  * mono_arch_dyn_call_prepare:
2609  *
2610  *   Return a pointer to an arch-specific structure which contains information 
2611  * needed by mono_arch_get_dyn_call_args (). Return NULL if OP_DYN_CALL is not
2612  * supported for SIG.
2613  * This function is equivalent to ffi_prep_cif in libffi.
2614  */
2615 MonoDynCallInfo*
2616 mono_arch_dyn_call_prepare (MonoMethodSignature *sig)
2617 {
2618         ArchDynCallInfo *info;
2619         CallInfo *cinfo;
2620
2621         cinfo = get_call_info (NULL, sig);
2622
2623         if (!dyn_call_supported (sig, cinfo)) {
2624                 g_free (cinfo);
2625                 return NULL;
2626         }
2627
2628         info = g_new0 (ArchDynCallInfo, 1);
2629         // FIXME: Preprocess the info to speed up get_dyn_call_args ().
2630         info->sig = sig;
2631         info->cinfo = cinfo;
2632         
2633         return (MonoDynCallInfo*)info;
2634 }
2635
2636 /*
2637  * mono_arch_dyn_call_free:
2638  *
2639  *   Free a MonoDynCallInfo structure.
2640  */
2641 void
2642 mono_arch_dyn_call_free (MonoDynCallInfo *info)
2643 {
2644         ArchDynCallInfo *ainfo = (ArchDynCallInfo*)info;
2645
2646         g_free (ainfo->cinfo);
2647         g_free (ainfo);
2648 }
2649
2650 #if !defined(__native_client__)
2651 #define PTR_TO_GREG(ptr) (mgreg_t)(ptr)
2652 #define GREG_TO_PTR(greg) (gpointer)(greg)
2653 #else
2654 /* Correctly handle casts to/from 32-bit pointers without compiler warnings */
2655 #define PTR_TO_GREG(ptr) (mgreg_t)(uintptr_t)(ptr)
2656 #define GREG_TO_PTR(greg) (gpointer)(guint32)(greg)
2657 #endif
2658
2659 /*
2660  * mono_arch_get_start_dyn_call:
2661  *
2662  *   Convert the arguments ARGS to a format which can be passed to OP_DYN_CALL, and
2663  * store the result into BUF.
2664  * ARGS should be an array of pointers pointing to the arguments.
2665  * RET should point to a memory buffer large enought to hold the result of the
2666  * call.
2667  * This function should be as fast as possible, any work which does not depend
2668  * on the actual values of the arguments should be done in 
2669  * mono_arch_dyn_call_prepare ().
2670  * start_dyn_call + OP_DYN_CALL + finish_dyn_call is equivalent to ffi_call in
2671  * libffi.
2672  */
2673 void
2674 mono_arch_start_dyn_call (MonoDynCallInfo *info, gpointer **args, guint8 *ret, guint8 *buf, int buf_len)
2675 {
2676         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2677         DynCallArgs *p = (DynCallArgs*)buf;
2678         int arg_index, greg, i, pindex;
2679         MonoMethodSignature *sig = dinfo->sig;
2680
2681         g_assert (buf_len >= sizeof (DynCallArgs));
2682
2683         p->res = 0;
2684         p->ret = ret;
2685
2686         arg_index = 0;
2687         greg = 0;
2688         pindex = 0;
2689
2690         if (sig->hasthis || dinfo->cinfo->vret_arg_index == 1) {
2691                 p->regs [greg ++] = PTR_TO_GREG(*(args [arg_index ++]));
2692                 if (!sig->hasthis)
2693                         pindex = 1;
2694         }
2695
2696         if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg)
2697                 p->regs [greg ++] = PTR_TO_GREG(ret);
2698
2699         for (i = pindex; i < sig->param_count; i++) {
2700                 MonoType *t = mini_get_underlying_type (sig->params [i]);
2701                 gpointer *arg = args [arg_index ++];
2702
2703                 if (t->byref) {
2704                         p->regs [greg ++] = PTR_TO_GREG(*(arg));
2705                         continue;
2706                 }
2707
2708                 switch (t->type) {
2709                 case MONO_TYPE_STRING:
2710                 case MONO_TYPE_CLASS:  
2711                 case MONO_TYPE_ARRAY:
2712                 case MONO_TYPE_SZARRAY:
2713                 case MONO_TYPE_OBJECT:
2714                 case MONO_TYPE_PTR:
2715                 case MONO_TYPE_I:
2716                 case MONO_TYPE_U:
2717 #if !defined(__mono_ilp32__)
2718                 case MONO_TYPE_I8:
2719                 case MONO_TYPE_U8:
2720 #endif
2721                         g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
2722                         p->regs [greg ++] = PTR_TO_GREG(*(arg));
2723                         break;
2724 #if defined(__mono_ilp32__)
2725                 case MONO_TYPE_I8:
2726                 case MONO_TYPE_U8:
2727                         g_assert (dinfo->cinfo->args [i + sig->hasthis].reg == param_regs [greg]);
2728                         p->regs [greg ++] = *(guint64*)(arg);
2729                         break;
2730 #endif
2731                 case MONO_TYPE_U1:
2732                         p->regs [greg ++] = *(guint8*)(arg);
2733                         break;
2734                 case MONO_TYPE_I1:
2735                         p->regs [greg ++] = *(gint8*)(arg);
2736                         break;
2737                 case MONO_TYPE_I2:
2738                         p->regs [greg ++] = *(gint16*)(arg);
2739                         break;
2740                 case MONO_TYPE_U2:
2741                         p->regs [greg ++] = *(guint16*)(arg);
2742                         break;
2743                 case MONO_TYPE_I4:
2744                         p->regs [greg ++] = *(gint32*)(arg);
2745                         break;
2746                 case MONO_TYPE_U4:
2747                         p->regs [greg ++] = *(guint32*)(arg);
2748                         break;
2749                 case MONO_TYPE_GENERICINST:
2750                     if (MONO_TYPE_IS_REFERENCE (t)) {
2751                                 p->regs [greg ++] = PTR_TO_GREG(*(arg));
2752                                 break;
2753                         } else {
2754                                 /* Fall through */
2755                         }
2756                 case MONO_TYPE_VALUETYPE: {
2757                         ArgInfo *ainfo = &dinfo->cinfo->args [i + sig->hasthis];
2758
2759                         g_assert (ainfo->storage == ArgValuetypeInReg);
2760                         if (ainfo->pair_storage [0] != ArgNone) {
2761                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2762                                 p->regs [greg ++] = ((mgreg_t*)(arg))[0];
2763                         }
2764                         if (ainfo->pair_storage [1] != ArgNone) {
2765                                 g_assert (ainfo->pair_storage [1] == ArgInIReg);
2766                                 p->regs [greg ++] = ((mgreg_t*)(arg))[1];
2767                         }
2768                         break;
2769                 }
2770                 default:
2771                         g_assert_not_reached ();
2772                 }
2773         }
2774
2775         g_assert (greg <= PARAM_REGS);
2776 }
2777
2778 /*
2779  * mono_arch_finish_dyn_call:
2780  *
2781  *   Store the result of a dyn call into the return value buffer passed to
2782  * start_dyn_call ().
2783  * This function should be as fast as possible, any work which does not depend
2784  * on the actual values of the arguments should be done in 
2785  * mono_arch_dyn_call_prepare ().
2786  */
2787 void
2788 mono_arch_finish_dyn_call (MonoDynCallInfo *info, guint8 *buf)
2789 {
2790         ArchDynCallInfo *dinfo = (ArchDynCallInfo*)info;
2791         MonoMethodSignature *sig = dinfo->sig;
2792         guint8 *ret = ((DynCallArgs*)buf)->ret;
2793         mgreg_t res = ((DynCallArgs*)buf)->res;
2794         MonoType *sig_ret = mini_get_underlying_type (sig->ret);
2795
2796         switch (sig_ret->type) {
2797         case MONO_TYPE_VOID:
2798                 *(gpointer*)ret = NULL;
2799                 break;
2800         case MONO_TYPE_STRING:
2801         case MONO_TYPE_CLASS:  
2802         case MONO_TYPE_ARRAY:
2803         case MONO_TYPE_SZARRAY:
2804         case MONO_TYPE_OBJECT:
2805         case MONO_TYPE_I:
2806         case MONO_TYPE_U:
2807         case MONO_TYPE_PTR:
2808                 *(gpointer*)ret = GREG_TO_PTR(res);
2809                 break;
2810         case MONO_TYPE_I1:
2811                 *(gint8*)ret = res;
2812                 break;
2813         case MONO_TYPE_U1:
2814                 *(guint8*)ret = res;
2815                 break;
2816         case MONO_TYPE_I2:
2817                 *(gint16*)ret = res;
2818                 break;
2819         case MONO_TYPE_U2:
2820                 *(guint16*)ret = res;
2821                 break;
2822         case MONO_TYPE_I4:
2823                 *(gint32*)ret = res;
2824                 break;
2825         case MONO_TYPE_U4:
2826                 *(guint32*)ret = res;
2827                 break;
2828         case MONO_TYPE_I8:
2829                 *(gint64*)ret = res;
2830                 break;
2831         case MONO_TYPE_U8:
2832                 *(guint64*)ret = res;
2833                 break;
2834         case MONO_TYPE_GENERICINST:
2835                 if (MONO_TYPE_IS_REFERENCE (sig_ret)) {
2836                         *(gpointer*)ret = GREG_TO_PTR(res);
2837                         break;
2838                 } else {
2839                         /* Fall through */
2840                 }
2841         case MONO_TYPE_VALUETYPE:
2842                 if (dinfo->cinfo->ret.storage == ArgValuetypeAddrInIReg) {
2843                         /* Nothing to do */
2844                 } else {
2845                         ArgInfo *ainfo = &dinfo->cinfo->ret;
2846
2847                         g_assert (ainfo->storage == ArgValuetypeInReg);
2848
2849                         if (ainfo->pair_storage [0] != ArgNone) {
2850                                 g_assert (ainfo->pair_storage [0] == ArgInIReg);
2851                                 ((mgreg_t*)ret)[0] = res;
2852                         }
2853
2854                         g_assert (ainfo->pair_storage [1] == ArgNone);
2855                 }
2856                 break;
2857         default:
2858                 g_assert_not_reached ();
2859         }
2860 }
2861
2862 /* emit an exception if condition is fail */
2863 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
2864         do {                                                        \
2865                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
2866                 if (tins == NULL) {                                                                             \
2867                         mono_add_patch_info (cfg, code - cfg->native_code,   \
2868                                         MONO_PATCH_INFO_EXC, exc_name);  \
2869                         x86_branch32 (code, cond, 0, signed);               \
2870                 } else {        \
2871                         EMIT_COND_BRANCH (tins, cond, signed);  \
2872                 }                       \
2873         } while (0); 
2874
2875 #define EMIT_FPCOMPARE(code) do { \
2876         amd64_fcompp (code); \
2877         amd64_fnstsw (code); \
2878 } while (0); 
2879
2880 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
2881     amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
2882         amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
2883         amd64_ ##op (code); \
2884         amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
2885         amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
2886 } while (0);
2887
2888 static guint8*
2889 emit_call_body (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
2890 {
2891         gboolean no_patch = FALSE;
2892
2893         /* 
2894          * FIXME: Add support for thunks
2895          */
2896         {
2897                 gboolean near_call = FALSE;
2898
2899                 /*
2900                  * Indirect calls are expensive so try to make a near call if possible.
2901                  * The caller memory is allocated by the code manager so it is 
2902                  * guaranteed to be at a 32 bit offset.
2903                  */
2904
2905                 if (patch_type != MONO_PATCH_INFO_ABS) {
2906                         /* The target is in memory allocated using the code manager */
2907                         near_call = TRUE;
2908
2909                         if ((patch_type == MONO_PATCH_INFO_METHOD) || (patch_type == MONO_PATCH_INFO_METHOD_JUMP)) {
2910                                 if (((MonoMethod*)data)->klass->image->aot_module)
2911                                         /* The callee might be an AOT method */
2912                                         near_call = FALSE;
2913                                 if (((MonoMethod*)data)->dynamic)
2914                                         /* The target is in malloc-ed memory */
2915                                         near_call = FALSE;
2916                         }
2917
2918                         if (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD) {
2919                                 /* 
2920                                  * The call might go directly to a native function without
2921                                  * the wrapper.
2922                                  */
2923                                 MonoJitICallInfo *mi = mono_find_jit_icall_by_name (data);
2924                                 if (mi) {
2925                                         gconstpointer target = mono_icall_get_wrapper (mi);
2926                                         if ((((guint64)target) >> 32) != 0)
2927                                                 near_call = FALSE;
2928                                 }
2929                         }
2930                 }
2931                 else {
2932                         MonoJumpInfo *jinfo = NULL;
2933
2934                         if (cfg->abs_patches)
2935                                 jinfo = g_hash_table_lookup (cfg->abs_patches, data);
2936                         if (jinfo) {
2937                                 if (jinfo->type == MONO_PATCH_INFO_JIT_ICALL_ADDR) {
2938                                         MonoJitICallInfo *mi = mono_find_jit_icall_by_name (jinfo->data.name);
2939                                         if (mi && (((guint64)mi->func) >> 32) == 0)
2940                                                 near_call = TRUE;
2941                                         no_patch = TRUE;
2942                                 } else {
2943                                         /* 
2944                                          * This is not really an optimization, but required because the
2945                                          * generic class init trampolines use R11 to pass the vtable.
2946                                          */
2947                                         near_call = TRUE;
2948                                 }
2949                         } else {
2950                                 MonoJitICallInfo *info = mono_find_jit_icall_by_addr (data);
2951                                 if (info) {
2952                                         if (info->func == info->wrapper) {
2953                                                 /* No wrapper */
2954                                                 if ((((guint64)info->func) >> 32) == 0)
2955                                                         near_call = TRUE;
2956                                         }
2957                                         else {
2958                                                 /* See the comment in mono_codegen () */
2959                                                 if ((info->name [0] != 'v') || (strstr (info->name, "ves_array_new_va_") == NULL && strstr (info->name, "ves_array_element_address_") == NULL))
2960                                                         near_call = TRUE;
2961                                         }
2962                                 }
2963                                 else if ((((guint64)data) >> 32) == 0) {
2964                                         near_call = TRUE;
2965                                         no_patch = TRUE;
2966                                 }
2967                         }
2968                 }
2969
2970                 if (cfg->method->dynamic)
2971                         /* These methods are allocated using malloc */
2972                         near_call = FALSE;
2973
2974 #ifdef MONO_ARCH_NOMAP32BIT
2975                 near_call = FALSE;
2976 #endif
2977 #if defined(__native_client__)
2978                 /* Always use near_call == TRUE for Native Client */
2979                 near_call = TRUE;
2980 #endif
2981                 /* The 64bit XEN kernel does not honour the MAP_32BIT flag. (#522894) */
2982                 if (optimize_for_xen)
2983                         near_call = FALSE;
2984
2985                 if (cfg->compile_aot) {
2986                         near_call = TRUE;
2987                         no_patch = TRUE;
2988                 }
2989
2990                 if (near_call) {
2991                         /* 
2992                          * Align the call displacement to an address divisible by 4 so it does
2993                          * not span cache lines. This is required for code patching to work on SMP
2994                          * systems.
2995                          */
2996                         if (!no_patch && ((guint32)(code + 1 - cfg->native_code) % 4) != 0) {
2997                                 guint32 pad_size = 4 - ((guint32)(code + 1 - cfg->native_code) % 4);
2998                                 amd64_padding (code, pad_size);
2999                         }
3000                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
3001                         amd64_call_code (code, 0);
3002                 }
3003                 else {
3004                         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
3005                         amd64_set_reg_template (code, GP_SCRATCH_REG);
3006                         amd64_call_reg (code, GP_SCRATCH_REG);
3007                 }
3008         }
3009
3010         return code;
3011 }
3012
3013 static inline guint8*
3014 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data, gboolean win64_adjust_stack)
3015 {
3016 #ifdef TARGET_WIN32
3017         if (win64_adjust_stack)
3018                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 32);
3019 #endif
3020         code = emit_call_body (cfg, code, patch_type, data);
3021 #ifdef TARGET_WIN32
3022         if (win64_adjust_stack)
3023                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 32);
3024 #endif  
3025         
3026         return code;
3027 }
3028
3029 static inline int
3030 store_membase_imm_to_store_membase_reg (int opcode)
3031 {
3032         switch (opcode) {
3033         case OP_STORE_MEMBASE_IMM:
3034                 return OP_STORE_MEMBASE_REG;
3035         case OP_STOREI4_MEMBASE_IMM:
3036                 return OP_STOREI4_MEMBASE_REG;
3037         case OP_STOREI8_MEMBASE_IMM:
3038                 return OP_STOREI8_MEMBASE_REG;
3039         }
3040
3041         return -1;
3042 }
3043
3044 #ifndef DISABLE_JIT
3045
3046 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
3047
3048 /*
3049  * mono_arch_peephole_pass_1:
3050  *
3051  *   Perform peephole opts which should/can be performed before local regalloc
3052  */
3053 void
3054 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
3055 {
3056         MonoInst *ins, *n;
3057
3058         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
3059                 MonoInst *last_ins = mono_inst_prev (ins, FILTER_IL_SEQ_POINT);
3060
3061                 switch (ins->opcode) {
3062                 case OP_ADD_IMM:
3063                 case OP_IADD_IMM:
3064                 case OP_LADD_IMM:
3065                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS) && (ins->inst_imm > 0)) {
3066                                 /* 
3067                                  * X86_LEA is like ADD, but doesn't have the
3068                                  * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends 
3069                                  * its operand to 64 bit.
3070                                  */
3071                                 ins->opcode = OP_X86_LEA_MEMBASE;
3072                                 ins->inst_basereg = ins->sreg1;
3073                         }
3074                         break;
3075                 case OP_LXOR:
3076                 case OP_IXOR:
3077                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
3078                                 MonoInst *ins2;
3079
3080                                 /* 
3081                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
3082                                  * the latter has length 2-3 instead of 6 (reverse constant
3083                                  * propagation). These instruction sequences are very common
3084                                  * in the initlocals bblock.
3085                                  */
3086                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
3087                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
3088                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
3089                                                 ins2->sreg1 = ins->dreg;
3090                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG)) {
3091                                                 /* Continue */
3092                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
3093                                                 NULLIFY_INS (ins2);
3094                                                 /* Continue */
3095                                         } else if (ins2->opcode == OP_IL_SEQ_POINT) {
3096                                                 /* Continue */
3097                                         } else {
3098                                                 break;
3099                                         }
3100                                 }
3101                         }
3102                         break;
3103                 case OP_COMPARE_IMM:
3104                 case OP_LCOMPARE_IMM:
3105                         /* OP_COMPARE_IMM (reg, 0) 
3106                          * --> 
3107                          * OP_AMD64_TEST_NULL (reg) 
3108                          */
3109                         if (!ins->inst_imm)
3110                                 ins->opcode = OP_AMD64_TEST_NULL;
3111                         break;
3112                 case OP_ICOMPARE_IMM:
3113                         if (!ins->inst_imm)
3114                                 ins->opcode = OP_X86_TEST_NULL;
3115                         break;
3116                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
3117                         /* 
3118                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
3119                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
3120                          * -->
3121                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
3122                          * OP_COMPARE_IMM reg, imm
3123                          *
3124                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
3125                          */
3126                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
3127                             ins->inst_basereg == last_ins->inst_destbasereg &&
3128                             ins->inst_offset == last_ins->inst_offset) {
3129                                         ins->opcode = OP_ICOMPARE_IMM;
3130                                         ins->sreg1 = last_ins->sreg1;
3131
3132                                         /* check if we can remove cmp reg,0 with test null */
3133                                         if (!ins->inst_imm)
3134                                                 ins->opcode = OP_X86_TEST_NULL;
3135                                 }
3136
3137                         break;
3138                 }
3139
3140                 mono_peephole_ins (bb, ins);
3141         }
3142 }
3143
3144 void
3145 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
3146 {
3147         MonoInst *ins, *n;
3148
3149         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
3150                 switch (ins->opcode) {
3151                 case OP_ICONST:
3152                 case OP_I8CONST: {
3153                         MonoInst *next = mono_inst_next (ins, FILTER_IL_SEQ_POINT);
3154                         /* reg = 0 -> XOR (reg, reg) */
3155                         /* XOR sets cflags on x86, so we cant do it always */
3156                         if (ins->inst_c0 == 0 && (!next || (next && INST_IGNORES_CFLAGS (next->opcode)))) {
3157                                 ins->opcode = OP_LXOR;
3158                                 ins->sreg1 = ins->dreg;
3159                                 ins->sreg2 = ins->dreg;
3160                                 /* Fall through */
3161                         } else {
3162                                 break;
3163                         }
3164                 }
3165                 case OP_LXOR:
3166                         /*
3167                          * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the 
3168                          * 0 result into 64 bits.
3169                          */
3170                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
3171                                 ins->opcode = OP_IXOR;
3172                         }
3173                         /* Fall through */
3174                 case OP_IXOR:
3175                         if ((ins->sreg1 == ins->sreg2) && (ins->sreg1 == ins->dreg)) {
3176                                 MonoInst *ins2;
3177
3178                                 /* 
3179                                  * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since 
3180                                  * the latter has length 2-3 instead of 6 (reverse constant
3181                                  * propagation). These instruction sequences are very common
3182                                  * in the initlocals bblock.
3183                                  */
3184                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
3185                                         if (((ins2->opcode == OP_STORE_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_IMM) || (ins2->opcode == OP_STOREI8_MEMBASE_IMM) || (ins2->opcode == OP_STORE_MEMBASE_IMM)) && (ins2->inst_imm == 0)) {
3186                                                 ins2->opcode = store_membase_imm_to_store_membase_reg (ins2->opcode);
3187                                                 ins2->sreg1 = ins->dreg;
3188                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM) || (ins2->opcode == OP_STOREI4_MEMBASE_REG) || (ins2->opcode == OP_STOREI8_MEMBASE_REG) || (ins2->opcode == OP_STORE_MEMBASE_REG) || (ins2->opcode == OP_LIVERANGE_START) || (ins2->opcode == OP_GC_LIVENESS_DEF) || (ins2->opcode == OP_GC_LIVENESS_USE)) {
3189                                                 /* Continue */
3190                                         } else if (((ins2->opcode == OP_ICONST) || (ins2->opcode == OP_I8CONST)) && (ins2->dreg == ins->dreg) && (ins2->inst_c0 == 0)) {
3191                                                 NULLIFY_INS (ins2);
3192                                                 /* Continue */
3193                                         } else if (ins2->opcode == OP_IL_SEQ_POINT) {
3194                                                 /* Continue */
3195                                         } else {
3196                                                 break;
3197                                         }
3198                                 }
3199                         }
3200                         break;
3201                 case OP_IADD_IMM:
3202                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
3203                                 ins->opcode = OP_X86_INC_REG;
3204                         break;
3205                 case OP_ISUB_IMM:
3206                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
3207                                 ins->opcode = OP_X86_DEC_REG;
3208                         break;
3209                 }
3210
3211                 mono_peephole_ins (bb, ins);
3212         }
3213 }
3214
3215 #define NEW_INS(cfg,ins,dest,op) do {   \
3216                 MONO_INST_NEW ((cfg), (dest), (op)); \
3217         (dest)->cil_code = (ins)->cil_code; \
3218         mono_bblock_insert_before_ins (bb, ins, (dest)); \
3219         } while (0)
3220
3221 /*
3222  * mono_arch_lowering_pass:
3223  *
3224  *  Converts complex opcodes into simpler ones so that each IR instruction
3225  * corresponds to one machine instruction.
3226  */
3227 void
3228 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
3229 {
3230         MonoInst *ins, *n, *temp;
3231
3232         /*
3233          * FIXME: Need to add more instructions, but the current machine 
3234          * description can't model some parts of the composite instructions like
3235          * cdq.
3236          */
3237         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
3238                 switch (ins->opcode) {
3239                 case OP_DIV_IMM:
3240                 case OP_REM_IMM:
3241                 case OP_IDIV_IMM:
3242                 case OP_IDIV_UN_IMM:
3243                 case OP_IREM_UN_IMM:
3244                 case OP_LREM_IMM:
3245                 case OP_IREM_IMM:
3246                         mono_decompose_op_imm (cfg, bb, ins);
3247                         break;
3248                 case OP_COMPARE_IMM:
3249                 case OP_LCOMPARE_IMM:
3250                         if (!amd64_use_imm32 (ins->inst_imm)) {
3251                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3252                                 temp->inst_c0 = ins->inst_imm;
3253                                 temp->dreg = mono_alloc_ireg (cfg);
3254                                 ins->opcode = OP_COMPARE;
3255                                 ins->sreg2 = temp->dreg;
3256                         }
3257                         break;
3258 #ifndef __mono_ilp32__
3259                 case OP_LOAD_MEMBASE:
3260 #endif
3261                 case OP_LOADI8_MEMBASE:
3262 #ifndef __native_client_codegen__
3263                 /*  Don't generate memindex opcodes (to simplify */
3264                 /*  read sandboxing) */
3265                         if (!amd64_use_imm32 (ins->inst_offset)) {
3266                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3267                                 temp->inst_c0 = ins->inst_offset;
3268                                 temp->dreg = mono_alloc_ireg (cfg);
3269                                 ins->opcode = OP_AMD64_LOADI8_MEMINDEX;
3270                                 ins->inst_indexreg = temp->dreg;
3271                         }
3272 #endif
3273                         break;
3274 #ifndef __mono_ilp32__
3275                 case OP_STORE_MEMBASE_IMM:
3276 #endif
3277                 case OP_STOREI8_MEMBASE_IMM:
3278                         if (!amd64_use_imm32 (ins->inst_imm)) {
3279                                 NEW_INS (cfg, ins, temp, OP_I8CONST);
3280                                 temp->inst_c0 = ins->inst_imm;
3281                                 temp->dreg = mono_alloc_ireg (cfg);
3282                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
3283                                 ins->sreg1 = temp->dreg;
3284                         }
3285                         break;
3286 #ifdef MONO_ARCH_SIMD_INTRINSICS
3287                 case OP_EXPAND_I1: {
3288                                 int temp_reg1 = mono_alloc_ireg (cfg);
3289                                 int temp_reg2 = mono_alloc_ireg (cfg);
3290                                 int original_reg = ins->sreg1;
3291
3292                                 NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
3293                                 temp->sreg1 = original_reg;
3294                                 temp->dreg = temp_reg1;
3295
3296                                 NEW_INS (cfg, ins, temp, OP_SHL_IMM);
3297                                 temp->sreg1 = temp_reg1;
3298                                 temp->dreg = temp_reg2;
3299                                 temp->inst_imm = 8;
3300
3301                                 NEW_INS (cfg, ins, temp, OP_LOR);
3302                                 temp->sreg1 = temp->dreg = temp_reg2;
3303                                 temp->sreg2 = temp_reg1;
3304
3305                                 ins->opcode = OP_EXPAND_I2;
3306                                 ins->sreg1 = temp_reg2;
3307                         }
3308                         break;
3309 #endif
3310                 default:
3311                         break;
3312                 }
3313         }
3314
3315         bb->max_vreg = cfg->next_vreg;
3316 }
3317
3318 static const int 
3319 branch_cc_table [] = {
3320         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
3321         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
3322         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
3323 };
3324
3325 /* Maps CMP_... constants to X86_CC_... constants */
3326 static const int
3327 cc_table [] = {
3328         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
3329         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
3330 };
3331
3332 static const int
3333 cc_signed_table [] = {
3334         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
3335         FALSE, FALSE, FALSE, FALSE
3336 };
3337
3338 /*#include "cprop.c"*/
3339
3340 static unsigned char*
3341 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed)
3342 {
3343         if (size == 8)
3344                 amd64_sse_cvttsd2si_reg_reg (code, dreg, sreg);
3345         else
3346                 amd64_sse_cvttsd2si_reg_reg_size (code, dreg, sreg, 4);
3347
3348         if (size == 1)
3349                 amd64_widen_reg (code, dreg, dreg, is_signed, FALSE);
3350         else if (size == 2)
3351                 amd64_widen_reg (code, dreg, dreg, is_signed, TRUE);
3352         return code;
3353 }
3354
3355 static unsigned char*
3356 mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree)
3357 {
3358         int sreg = tree->sreg1;
3359         int need_touch = FALSE;
3360
3361 #if defined(TARGET_WIN32)
3362         need_touch = TRUE;
3363 #elif defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3364         if (!tree->flags & MONO_INST_INIT)
3365                 need_touch = TRUE;
3366 #endif
3367
3368         if (need_touch) {
3369                 guint8* br[5];
3370
3371                 /*
3372                  * Under Windows:
3373                  * If requested stack size is larger than one page,
3374                  * perform stack-touch operation
3375                  */
3376                 /*
3377                  * Generate stack probe code.
3378                  * Under Windows, it is necessary to allocate one page at a time,
3379                  * "touching" stack after each successful sub-allocation. This is
3380                  * because of the way stack growth is implemented - there is a
3381                  * guard page before the lowest stack page that is currently commited.
3382                  * Stack normally grows sequentially so OS traps access to the
3383                  * guard page and commits more pages when needed.
3384                  */
3385                 amd64_test_reg_imm (code, sreg, ~0xFFF);
3386                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3387
3388                 br[2] = code; /* loop */
3389                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
3390                 amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
3391                 amd64_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
3392                 amd64_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
3393                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
3394                 amd64_patch (br[3], br[2]);
3395                 amd64_test_reg_reg (code, sreg, sreg);
3396                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
3397                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3398
3399                 br[1] = code; x86_jump8 (code, 0);
3400
3401                 amd64_patch (br[0], code);
3402                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, sreg);
3403                 amd64_patch (br[1], code);
3404                 amd64_patch (br[4], code);
3405         }
3406         else
3407                 amd64_alu_reg_reg (code, X86_SUB, AMD64_RSP, tree->sreg1);
3408
3409         if (tree->flags & MONO_INST_INIT) {
3410                 int offset = 0;
3411                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX) {
3412                         amd64_push_reg (code, AMD64_RAX);
3413                         offset += 8;
3414                 }
3415                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX) {
3416                         amd64_push_reg (code, AMD64_RCX);
3417                         offset += 8;
3418                 }
3419                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI) {
3420                         amd64_push_reg (code, AMD64_RDI);
3421                         offset += 8;
3422                 }
3423                 
3424                 amd64_shift_reg_imm (code, X86_SHR, sreg, 3);
3425                 if (sreg != AMD64_RCX)
3426                         amd64_mov_reg_reg (code, AMD64_RCX, sreg, 8);
3427                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
3428                                 
3429                 amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, offset);
3430                 if (cfg->param_area)
3431                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RDI, cfg->param_area);
3432                 amd64_cld (code);
3433 #if defined(__default_codegen__)
3434                 amd64_prefix (code, X86_REP_PREFIX);
3435                 amd64_stosl (code);
3436 #elif defined(__native_client_codegen__)
3437                 /* NaCl stos pseudo-instruction */
3438                 amd64_codegen_pre(code);
3439                 /* First, clear the upper 32 bits of RDI (mov %edi, %edi)  */
3440                 amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RDI, 4);
3441                 /* Add %r15 to %rdi using lea, condition flags unaffected. */
3442                 amd64_lea_memindex_size (code, AMD64_RDI, AMD64_R15, 0, AMD64_RDI, 0, 8);
3443                 amd64_prefix (code, X86_REP_PREFIX);
3444                 amd64_stosl (code);
3445                 amd64_codegen_post(code);
3446 #endif /* __native_client_codegen__ */
3447                 
3448                 if (tree->dreg != AMD64_RDI && sreg != AMD64_RDI)
3449                         amd64_pop_reg (code, AMD64_RDI);
3450                 if (tree->dreg != AMD64_RCX && sreg != AMD64_RCX)
3451                         amd64_pop_reg (code, AMD64_RCX);
3452                 if (tree->dreg != AMD64_RAX && sreg != AMD64_RAX)
3453                         amd64_pop_reg (code, AMD64_RAX);
3454         }
3455         return code;
3456 }
3457
3458 static guint8*
3459 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
3460 {
3461         CallInfo *cinfo;
3462         guint32 quad;
3463
3464         /* Move return value to the target register */
3465         /* FIXME: do this in the local reg allocator */
3466         switch (ins->opcode) {
3467         case OP_CALL:
3468         case OP_CALL_REG:
3469         case OP_CALL_MEMBASE:
3470         case OP_LCALL:
3471         case OP_LCALL_REG:
3472         case OP_LCALL_MEMBASE:
3473                 g_assert (ins->dreg == AMD64_RAX);
3474                 break;
3475         case OP_FCALL:
3476         case OP_FCALL_REG:
3477         case OP_FCALL_MEMBASE: {
3478                 MonoType *rtype = mini_get_underlying_type (((MonoCallInst*)ins)->signature->ret);
3479                 if (rtype->type == MONO_TYPE_R4) {
3480                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, AMD64_XMM0);
3481                 }
3482                 else {
3483                         if (ins->dreg != AMD64_XMM0)
3484                                 amd64_sse_movsd_reg_reg (code, ins->dreg, AMD64_XMM0);
3485                 }
3486                 break;
3487         }
3488         case OP_RCALL:
3489         case OP_RCALL_REG:
3490         case OP_RCALL_MEMBASE:
3491                 if (ins->dreg != AMD64_XMM0)
3492                         amd64_sse_movss_reg_reg (code, ins->dreg, AMD64_XMM0);
3493                 break;
3494         case OP_VCALL:
3495         case OP_VCALL_REG:
3496         case OP_VCALL_MEMBASE:
3497         case OP_VCALL2:
3498         case OP_VCALL2_REG:
3499         case OP_VCALL2_MEMBASE:
3500                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature);
3501                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3502                         MonoInst *loc = cfg->arch.vret_addr_loc;
3503
3504                         /* Load the destination address */
3505                         g_assert (loc->opcode == OP_REGOFFSET);
3506                         amd64_mov_reg_membase (code, AMD64_RCX, loc->inst_basereg, loc->inst_offset, sizeof(gpointer));
3507
3508                         for (quad = 0; quad < 2; quad ++) {
3509                                 switch (cinfo->ret.pair_storage [quad]) {
3510                                 case ArgInIReg:
3511                                         amd64_mov_membase_reg (code, AMD64_RCX, (quad * sizeof(mgreg_t)), cinfo->ret.pair_regs [quad], sizeof(mgreg_t));
3512                                         break;
3513                                 case ArgInFloatSSEReg:
3514                                         amd64_movss_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3515                                         break;
3516                                 case ArgInDoubleSSEReg:
3517                                         amd64_movsd_membase_reg (code, AMD64_RCX, (quad * 8), cinfo->ret.pair_regs [quad]);
3518                                         break;
3519                                 case ArgNone:
3520                                         break;
3521                                 default:
3522                                         NOT_IMPLEMENTED;
3523                                 }
3524                         }
3525                 }
3526                 break;
3527         }
3528
3529         return code;
3530 }
3531
3532 #endif /* DISABLE_JIT */
3533
3534 #ifdef __APPLE__
3535 static int tls_gs_offset;
3536 #endif
3537
3538 gboolean
3539 mono_amd64_have_tls_get (void)
3540 {
3541 #ifdef TARGET_MACH
3542         static gboolean have_tls_get = FALSE;
3543         static gboolean inited = FALSE;
3544
3545         if (inited)
3546                 return have_tls_get;
3547
3548 #if MONO_HAVE_FAST_TLS
3549         guint8 *ins = (guint8*)pthread_getspecific;
3550
3551         /*
3552          * We're looking for these two instructions:
3553          *
3554          * mov    %gs:[offset](,%rdi,8),%rax
3555          * retq
3556          */
3557         have_tls_get = ins [0] == 0x65 &&
3558                        ins [1] == 0x48 &&
3559                        ins [2] == 0x8b &&
3560                        ins [3] == 0x04 &&
3561                        ins [4] == 0xfd &&
3562                        ins [6] == 0x00 &&
3563                        ins [7] == 0x00 &&
3564                        ins [8] == 0x00 &&
3565                        ins [9] == 0xc3;
3566
3567         tls_gs_offset = ins[5];
3568 #endif
3569
3570         inited = TRUE;
3571
3572         return have_tls_get;
3573 #elif defined(TARGET_ANDROID)
3574         return FALSE;
3575 #else
3576         return TRUE;
3577 #endif
3578 }
3579
3580 int
3581 mono_amd64_get_tls_gs_offset (void)
3582 {
3583 #ifdef TARGET_OSX
3584         return tls_gs_offset;
3585 #else
3586         g_assert_not_reached ();
3587         return -1;
3588 #endif
3589 }
3590
3591 /*
3592  * mono_amd64_emit_tls_get:
3593  * @code: buffer to store code to
3594  * @dreg: hard register where to place the result
3595  * @tls_offset: offset info
3596  *
3597  * mono_amd64_emit_tls_get emits in @code the native code that puts in
3598  * the dreg register the item in the thread local storage identified
3599  * by tls_offset.
3600  *
3601  * Returns: a pointer to the end of the stored code
3602  */
3603 guint8*
3604 mono_amd64_emit_tls_get (guint8* code, int dreg, int tls_offset)
3605 {
3606 #ifdef TARGET_WIN32
3607         if (tls_offset < 64) {
3608                 x86_prefix (code, X86_GS_PREFIX);
3609                 amd64_mov_reg_mem (code, dreg, (tls_offset * 8) + 0x1480, 8);
3610         } else {
3611                 guint8 *buf [16];
3612
3613                 g_assert (tls_offset < 0x440);
3614                 /* Load TEB->TlsExpansionSlots */
3615                 x86_prefix (code, X86_GS_PREFIX);
3616                 amd64_mov_reg_mem (code, dreg, 0x1780, 8);
3617                 amd64_test_reg_reg (code, dreg, dreg);
3618                 buf [0] = code;
3619                 amd64_branch (code, X86_CC_EQ, code, TRUE);
3620                 amd64_mov_reg_membase (code, dreg, dreg, (tls_offset * 8) - 0x200, 8);
3621                 amd64_patch (buf [0], code);
3622         }
3623 #elif defined(__APPLE__)
3624         x86_prefix (code, X86_GS_PREFIX);
3625         amd64_mov_reg_mem (code, dreg, tls_gs_offset + (tls_offset * 8), 8);
3626 #else
3627         if (optimize_for_xen) {
3628                 x86_prefix (code, X86_FS_PREFIX);
3629                 amd64_mov_reg_mem (code, dreg, 0, 8);
3630                 amd64_mov_reg_membase (code, dreg, dreg, tls_offset, 8);
3631         } else {
3632                 x86_prefix (code, X86_FS_PREFIX);
3633                 amd64_mov_reg_mem (code, dreg, tls_offset, 8);
3634         }
3635 #endif
3636         return code;
3637 }
3638
3639 static guint8*
3640 emit_tls_get_reg (guint8* code, int dreg, int offset_reg)
3641 {
3642         /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
3643 #ifdef TARGET_OSX
3644         if (dreg != offset_reg)
3645                 amd64_mov_reg_reg (code, dreg, offset_reg, sizeof (mgreg_t));
3646         amd64_prefix (code, X86_GS_PREFIX);
3647         amd64_mov_reg_membase (code, dreg, dreg, 0, sizeof (mgreg_t));
3648 #elif defined(__linux__)
3649         int tmpreg = -1;
3650
3651         if (dreg == offset_reg) {
3652                 /* Use a temporary reg by saving it to the redzone */
3653                 tmpreg = dreg == AMD64_RAX ? AMD64_RCX : AMD64_RAX;
3654                 amd64_mov_membase_reg (code, AMD64_RSP, -8, tmpreg, 8);
3655                 amd64_mov_reg_reg (code, tmpreg, offset_reg, sizeof (gpointer));
3656                 offset_reg = tmpreg;
3657         }
3658         x86_prefix (code, X86_FS_PREFIX);
3659         amd64_mov_reg_mem (code, dreg, 0, 8);
3660         amd64_mov_reg_memindex (code, dreg, dreg, 0, offset_reg, 0, 8);
3661         if (tmpreg != -1)
3662                 amd64_mov_reg_membase (code, tmpreg, AMD64_RSP, -8, 8);
3663 #else
3664         g_assert_not_reached ();
3665 #endif
3666         return code;
3667 }
3668
3669 static guint8*
3670 amd64_emit_tls_set (guint8 *code, int sreg, int tls_offset)
3671 {
3672 #ifdef TARGET_WIN32
3673         g_assert_not_reached ();
3674 #elif defined(__APPLE__)
3675         x86_prefix (code, X86_GS_PREFIX);
3676         amd64_mov_mem_reg (code, tls_gs_offset + (tls_offset * 8), sreg, 8);
3677 #else
3678         g_assert (!optimize_for_xen);
3679         x86_prefix (code, X86_FS_PREFIX);
3680         amd64_mov_mem_reg (code, tls_offset, sreg, 8);
3681 #endif
3682         return code;
3683 }
3684
3685 static guint8*
3686 amd64_emit_tls_set_reg (guint8 *code, int sreg, int offset_reg)
3687 {
3688         /* offset_reg contains a value translated by mono_arch_translate_tls_offset () */
3689 #ifdef TARGET_WIN32
3690         g_assert_not_reached ();
3691 #elif defined(__APPLE__)
3692         x86_prefix (code, X86_GS_PREFIX);
3693         amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
3694 #else
3695         x86_prefix (code, X86_FS_PREFIX);
3696         amd64_mov_membase_reg (code, offset_reg, 0, sreg, 8);
3697 #endif
3698         return code;
3699 }
3700  
3701  /*
3702  * mono_arch_translate_tls_offset:
3703  *
3704  *   Translate the TLS offset OFFSET computed by MONO_THREAD_VAR_OFFSET () into a format usable by OP_TLS_GET_REG/OP_TLS_SET_REG.
3705  */
3706 int
3707 mono_arch_translate_tls_offset (int offset)
3708 {
3709 #ifdef __APPLE__
3710         return tls_gs_offset + (offset * 8);
3711 #else
3712         return offset;
3713 #endif
3714 }
3715
3716 /*
3717  * emit_setup_lmf:
3718  *
3719  *   Emit code to initialize an LMF structure at LMF_OFFSET.
3720  */
3721 static guint8*
3722 emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset, int cfa_offset)
3723 {
3724         /* 
3725          * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
3726          */
3727         /* 
3728          * sp is saved right before calls but we need to save it here too so
3729          * async stack walks would work.
3730          */
3731         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
3732         /* Save rbp */
3733         amd64_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), AMD64_RBP, 8);
3734         if (cfg->arch.omit_fp && cfa_offset != -1)
3735                 mono_emit_unwind_op_offset (cfg, code, AMD64_RBP, - (cfa_offset - (lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp))));
3736
3737         /* These can't contain refs */
3738         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, previous_lmf), SLOT_NOREF);
3739         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rip), SLOT_NOREF);
3740         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), SLOT_NOREF);
3741         /* These are handled automatically by the stack marking code */
3742         mini_gc_set_slot_type_from_fp (cfg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), SLOT_NOREF);
3743
3744         return code;
3745 }
3746
3747 #define REAL_PRINT_REG(text,reg) \
3748 mono_assert (reg >= 0); \
3749 amd64_push_reg (code, AMD64_RAX); \
3750 amd64_push_reg (code, AMD64_RDX); \
3751 amd64_push_reg (code, AMD64_RCX); \
3752 amd64_push_reg (code, reg); \
3753 amd64_push_imm (code, reg); \
3754 amd64_push_imm (code, text " %d %p\n"); \
3755 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
3756 amd64_call_reg (code, AMD64_RAX); \
3757 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
3758 amd64_pop_reg (code, AMD64_RCX); \
3759 amd64_pop_reg (code, AMD64_RDX); \
3760 amd64_pop_reg (code, AMD64_RAX);
3761
3762 /* benchmark and set based on cpu */
3763 #define LOOP_ALIGNMENT 8
3764 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
3765
3766 #ifndef DISABLE_JIT
3767 void
3768 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3769 {
3770         MonoInst *ins;
3771         MonoCallInst *call;
3772         guint offset;
3773         guint8 *code = cfg->native_code + cfg->code_len;
3774         int max_len;
3775
3776         /* Fix max_offset estimate for each successor bb */
3777         if (cfg->opt & MONO_OPT_BRANCH) {
3778                 int current_offset = cfg->code_len;
3779                 MonoBasicBlock *current_bb;
3780                 for (current_bb = bb; current_bb != NULL; current_bb = current_bb->next_bb) {
3781                         current_bb->max_offset = current_offset;
3782                         current_offset += current_bb->max_length;
3783                 }
3784         }
3785
3786         if (cfg->opt & MONO_OPT_LOOP) {
3787                 int pad, align = LOOP_ALIGNMENT;
3788                 /* set alignment depending on cpu */
3789                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
3790                         pad = align - pad;
3791                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
3792                         amd64_padding (code, pad);
3793                         cfg->code_len += pad;
3794                         bb->native_offset = cfg->code_len;
3795                 }
3796         }
3797
3798 #if defined(__native_client_codegen__)
3799         /* For Native Client, all indirect call/jump targets must be */
3800         /* 32-byte aligned.  Exception handler blocks are jumped to  */
3801         /* indirectly as well.                                       */
3802         gboolean bb_needs_alignment = (bb->flags & BB_INDIRECT_JUMP_TARGET) ||
3803                                       (bb->flags & BB_EXCEPTION_HANDLER);
3804
3805         if ( bb_needs_alignment && ((cfg->code_len & kNaClAlignmentMask) != 0)) {
3806                 int pad = kNaClAlignment - (cfg->code_len & kNaClAlignmentMask);
3807                 if (pad != kNaClAlignment) code = mono_arch_nacl_pad(code, pad);
3808                 cfg->code_len += pad;
3809                 bb->native_offset = cfg->code_len;
3810         }
3811 #endif  /*__native_client_codegen__*/
3812
3813         if (cfg->verbose_level > 2)
3814                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
3815
3816         if ((cfg->prof_options & MONO_PROFILE_COVERAGE) && cfg->coverage_info) {
3817                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
3818                 g_assert (!cfg->compile_aot);
3819
3820                 cov->data [bb->dfn].cil_code = bb->cil_code;
3821                 amd64_mov_reg_imm (code, AMD64_R11, (guint64)&cov->data [bb->dfn].count);
3822                 /* this is not thread save, but good enough */
3823                 amd64_inc_membase (code, AMD64_R11, 0);
3824         }
3825
3826         offset = code - cfg->native_code;
3827
3828         mono_debug_open_block (cfg, bb, offset);
3829
3830     if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
3831                 x86_breakpoint (code);
3832
3833         MONO_BB_FOR_EACH_INS (bb, ins) {
3834                 offset = code - cfg->native_code;
3835
3836                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3837
3838 #define EXTRA_CODE_SPACE (NACL_SIZE (16, 16 + kNaClAlignment))
3839
3840                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) {
3841                         cfg->code_size *= 2;
3842                         cfg->native_code = mono_realloc_native_code(cfg);
3843                         code = cfg->native_code + offset;
3844                         cfg->stat_code_reallocs++;
3845                 }
3846
3847                 if (cfg->debug_info)
3848                         mono_debug_record_line_number (cfg, ins, offset);
3849
3850                 switch (ins->opcode) {
3851                 case OP_BIGMUL:
3852                         amd64_mul_reg (code, ins->sreg2, TRUE);
3853                         break;
3854                 case OP_BIGMUL_UN:
3855                         amd64_mul_reg (code, ins->sreg2, FALSE);
3856                         break;
3857                 case OP_X86_SETEQ_MEMBASE:
3858                         amd64_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
3859                         break;
3860                 case OP_STOREI1_MEMBASE_IMM:
3861                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
3862                         break;
3863                 case OP_STOREI2_MEMBASE_IMM:
3864                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
3865                         break;
3866                 case OP_STOREI4_MEMBASE_IMM:
3867                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
3868                         break;
3869                 case OP_STOREI1_MEMBASE_REG:
3870                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
3871                         break;
3872                 case OP_STOREI2_MEMBASE_REG:
3873                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
3874                         break;
3875                 /* In AMD64 NaCl, pointers are 4 bytes, */
3876                 /*  so STORE_* != STOREI8_*. Likewise below. */
3877                 case OP_STORE_MEMBASE_REG:
3878                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, sizeof(gpointer));
3879                         break;
3880                 case OP_STOREI8_MEMBASE_REG:
3881                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 8);
3882                         break;
3883                 case OP_STOREI4_MEMBASE_REG:
3884                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
3885                         break;
3886                 case OP_STORE_MEMBASE_IMM:
3887 #ifndef __native_client_codegen__
3888                         /* In NaCl, this could be a PCONST type, which could */
3889                         /* mean a pointer type was copied directly into the  */
3890                         /* lower 32-bits of inst_imm, so for InvalidPtr==-1  */
3891                         /* the value would be 0x00000000FFFFFFFF which is    */
3892                         /* not proper for an imm32 unless you cast it.       */
3893                         g_assert (amd64_is_imm32 (ins->inst_imm));
3894 #endif
3895                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, (gint32)ins->inst_imm, sizeof(gpointer));
3896                         break;
3897                 case OP_STOREI8_MEMBASE_IMM:
3898                         g_assert (amd64_is_imm32 (ins->inst_imm));
3899                         amd64_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 8);
3900                         break;
3901                 case OP_LOAD_MEM:
3902 #ifdef __mono_ilp32__
3903                         /* In ILP32, pointers are 4 bytes, so separate these */
3904                         /* cases, use literal 8 below where we really want 8 */
3905                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3906                         amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, sizeof(gpointer));
3907                         break;
3908 #endif
3909                 case OP_LOADI8_MEM:
3910                         // FIXME: Decompose this earlier
3911                         if (amd64_use_imm32 (ins->inst_imm))
3912                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 8);
3913                         else {
3914                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_imm, sizeof(gpointer));
3915                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 8);
3916                         }
3917                         break;
3918                 case OP_LOADI4_MEM:
3919                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3920                         amd64_movsxd_reg_membase (code, ins->dreg, ins->dreg, 0);
3921                         break;
3922                 case OP_LOADU4_MEM:
3923                         // FIXME: Decompose this earlier
3924                         if (amd64_use_imm32 (ins->inst_imm))
3925                                 amd64_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
3926                         else {
3927                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_imm, sizeof(gpointer));
3928                                 amd64_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
3929                         }
3930                         break;
3931                 case OP_LOADU1_MEM:
3932                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3933                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, FALSE);
3934                         break;
3935                 case OP_LOADU2_MEM:
3936                         /* For NaCl, pointers are 4 bytes, so separate these */
3937                         /* cases, use literal 8 below where we really want 8 */
3938                         amd64_mov_reg_imm (code, ins->dreg, ins->inst_imm);
3939                         amd64_widen_membase (code, ins->dreg, ins->dreg, 0, FALSE, TRUE);
3940                         break;
3941                 case OP_LOAD_MEMBASE:
3942                         g_assert (amd64_is_imm32 (ins->inst_offset));
3943                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, sizeof(gpointer));
3944                         break;
3945                 case OP_LOADI8_MEMBASE:
3946                         /* Use literal 8 instead of sizeof pointer or */
3947                         /* register, we really want 8 for this opcode */
3948                         g_assert (amd64_is_imm32 (ins->inst_offset));
3949                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 8);
3950                         break;
3951                 case OP_LOADI4_MEMBASE:
3952                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
3953                         break;
3954                 case OP_LOADU4_MEMBASE:
3955                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
3956                         break;
3957                 case OP_LOADU1_MEMBASE:
3958                         /* The cpu zero extends the result into 64 bits */
3959                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE, 4);
3960                         break;
3961                 case OP_LOADI1_MEMBASE:
3962                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
3963                         break;
3964                 case OP_LOADU2_MEMBASE:
3965                         /* The cpu zero extends the result into 64 bits */
3966                         amd64_widen_membase_size (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE, 4);
3967                         break;
3968                 case OP_LOADI2_MEMBASE:
3969                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
3970                         break;
3971                 case OP_AMD64_LOADI8_MEMINDEX:
3972                         amd64_mov_reg_memindex_size (code, ins->dreg, ins->inst_basereg, 0, ins->inst_indexreg, 0, 8);
3973                         break;
3974                 case OP_LCONV_TO_I1:
3975                 case OP_ICONV_TO_I1:
3976                 case OP_SEXT_I1:
3977                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3978                         break;
3979                 case OP_LCONV_TO_I2:
3980                 case OP_ICONV_TO_I2:
3981                 case OP_SEXT_I2:
3982                         amd64_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3983                         break;
3984                 case OP_LCONV_TO_U1:
3985                 case OP_ICONV_TO_U1:
3986                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
3987                         break;
3988                 case OP_LCONV_TO_U2:
3989                 case OP_ICONV_TO_U2:
3990                         amd64_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
3991                         break;
3992                 case OP_ZEXT_I4:
3993                         /* Clean out the upper word */
3994                         amd64_mov_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
3995                         break;
3996                 case OP_SEXT_I4:
3997                         amd64_movsxd_reg_reg (code, ins->dreg, ins->sreg1);
3998                         break;
3999                 case OP_COMPARE:
4000                 case OP_LCOMPARE:
4001                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
4002                         break;
4003                 case OP_COMPARE_IMM:
4004 #if defined(__mono_ilp32__)
4005                         /* Comparison of pointer immediates should be 4 bytes to avoid sign-extend problems */
4006                         g_assert (amd64_is_imm32 (ins->inst_imm));
4007                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
4008                         break;
4009 #endif
4010                 case OP_LCOMPARE_IMM:
4011                         g_assert (amd64_is_imm32 (ins->inst_imm));
4012                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4013                         break;
4014                 case OP_X86_COMPARE_REG_MEMBASE:
4015                         amd64_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
4016                         break;
4017                 case OP_X86_TEST_NULL:
4018                         amd64_test_reg_reg_size (code, ins->sreg1, ins->sreg1, 4);
4019                         break;
4020                 case OP_AMD64_TEST_NULL:
4021                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
4022                         break;
4023
4024                 case OP_X86_ADD_REG_MEMBASE:
4025                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4026                         break;
4027                 case OP_X86_SUB_REG_MEMBASE:
4028                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4029                         break;
4030                 case OP_X86_AND_REG_MEMBASE:
4031                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4032                         break;
4033                 case OP_X86_OR_REG_MEMBASE:
4034                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4035                         break;
4036                 case OP_X86_XOR_REG_MEMBASE:
4037                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4038                         break;
4039
4040                 case OP_X86_ADD_MEMBASE_IMM:
4041                         /* FIXME: Make a 64 version too */
4042                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4043                         break;
4044                 case OP_X86_SUB_MEMBASE_IMM:
4045                         g_assert (amd64_is_imm32 (ins->inst_imm));
4046                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4047                         break;
4048                 case OP_X86_AND_MEMBASE_IMM:
4049                         g_assert (amd64_is_imm32 (ins->inst_imm));
4050                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4051                         break;
4052                 case OP_X86_OR_MEMBASE_IMM:
4053                         g_assert (amd64_is_imm32 (ins->inst_imm));
4054                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4055                         break;
4056                 case OP_X86_XOR_MEMBASE_IMM:
4057                         g_assert (amd64_is_imm32 (ins->inst_imm));
4058                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4059                         break;
4060                 case OP_X86_ADD_MEMBASE_REG:
4061                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4062                         break;
4063                 case OP_X86_SUB_MEMBASE_REG:
4064                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4065                         break;
4066                 case OP_X86_AND_MEMBASE_REG:
4067                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4068                         break;
4069                 case OP_X86_OR_MEMBASE_REG:
4070                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4071                         break;
4072                 case OP_X86_XOR_MEMBASE_REG:
4073                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4074                         break;
4075                 case OP_X86_INC_MEMBASE:
4076                         amd64_inc_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
4077                         break;
4078                 case OP_X86_INC_REG:
4079                         amd64_inc_reg_size (code, ins->dreg, 4);
4080                         break;
4081                 case OP_X86_DEC_MEMBASE:
4082                         amd64_dec_membase_size (code, ins->inst_basereg, ins->inst_offset, 4);
4083                         break;
4084                 case OP_X86_DEC_REG:
4085                         amd64_dec_reg_size (code, ins->dreg, 4);
4086                         break;
4087                 case OP_X86_MUL_REG_MEMBASE:
4088                 case OP_X86_MUL_MEMBASE_REG:
4089                         amd64_imul_reg_membase_size (code, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4090                         break;
4091                 case OP_AMD64_ICOMPARE_MEMBASE_REG:
4092                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 4);
4093                         break;
4094                 case OP_AMD64_ICOMPARE_MEMBASE_IMM:
4095                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4096                         break;
4097                 case OP_AMD64_COMPARE_MEMBASE_REG:
4098                         amd64_alu_membase_reg_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4099                         break;
4100                 case OP_AMD64_COMPARE_MEMBASE_IMM:
4101                         g_assert (amd64_is_imm32 (ins->inst_imm));
4102                         amd64_alu_membase_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4103                         break;
4104                 case OP_X86_COMPARE_MEMBASE8_IMM:
4105                         amd64_alu_membase8_imm_size (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 4);
4106                         break;
4107                 case OP_AMD64_ICOMPARE_REG_MEMBASE:
4108                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 4);
4109                         break;
4110                 case OP_AMD64_COMPARE_REG_MEMBASE:
4111                         amd64_alu_reg_membase_size (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4112                         break;
4113
4114                 case OP_AMD64_ADD_REG_MEMBASE:
4115                         amd64_alu_reg_membase_size (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4116                         break;
4117                 case OP_AMD64_SUB_REG_MEMBASE:
4118                         amd64_alu_reg_membase_size (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4119                         break;
4120                 case OP_AMD64_AND_REG_MEMBASE:
4121                         amd64_alu_reg_membase_size (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4122                         break;
4123                 case OP_AMD64_OR_REG_MEMBASE:
4124                         amd64_alu_reg_membase_size (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4125                         break;
4126                 case OP_AMD64_XOR_REG_MEMBASE:
4127                         amd64_alu_reg_membase_size (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset, 8);
4128                         break;
4129
4130                 case OP_AMD64_ADD_MEMBASE_REG:
4131                         amd64_alu_membase_reg_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4132                         break;
4133                 case OP_AMD64_SUB_MEMBASE_REG:
4134                         amd64_alu_membase_reg_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4135                         break;
4136                 case OP_AMD64_AND_MEMBASE_REG:
4137                         amd64_alu_membase_reg_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4138                         break;
4139                 case OP_AMD64_OR_MEMBASE_REG:
4140                         amd64_alu_membase_reg_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4141                         break;
4142                 case OP_AMD64_XOR_MEMBASE_REG:
4143                         amd64_alu_membase_reg_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2, 8);
4144                         break;
4145
4146                 case OP_AMD64_ADD_MEMBASE_IMM:
4147                         g_assert (amd64_is_imm32 (ins->inst_imm));
4148                         amd64_alu_membase_imm_size (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4149                         break;
4150                 case OP_AMD64_SUB_MEMBASE_IMM:
4151                         g_assert (amd64_is_imm32 (ins->inst_imm));
4152                         amd64_alu_membase_imm_size (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4153                         break;
4154                 case OP_AMD64_AND_MEMBASE_IMM:
4155                         g_assert (amd64_is_imm32 (ins->inst_imm));
4156                         amd64_alu_membase_imm_size (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4157                         break;
4158                 case OP_AMD64_OR_MEMBASE_IMM:
4159                         g_assert (amd64_is_imm32 (ins->inst_imm));
4160                         amd64_alu_membase_imm_size (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4161                         break;
4162                 case OP_AMD64_XOR_MEMBASE_IMM:
4163                         g_assert (amd64_is_imm32 (ins->inst_imm));
4164                         amd64_alu_membase_imm_size (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm, 8);
4165                         break;
4166
4167                 case OP_BREAK:
4168                         amd64_breakpoint (code);
4169                         break;
4170                 case OP_RELAXED_NOP:
4171                         x86_prefix (code, X86_REP_PREFIX);
4172                         x86_nop (code);
4173                         break;
4174                 case OP_HARD_NOP:
4175                         x86_nop (code);
4176                         break;
4177                 case OP_NOP:
4178                 case OP_DUMMY_USE:
4179                 case OP_DUMMY_STORE:
4180                 case OP_DUMMY_ICONST:
4181                 case OP_DUMMY_R8CONST:
4182                 case OP_NOT_REACHED:
4183                 case OP_NOT_NULL:
4184                         break;
4185                 case OP_IL_SEQ_POINT:
4186                         mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
4187                         break;
4188                 case OP_SEQ_POINT: {
4189                         int i;
4190
4191                         if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
4192                                 if (cfg->compile_aot) {
4193                                         MonoInst *var = cfg->arch.ss_tramp_var;
4194                                         guint8 *label;
4195
4196                                         /* Load ss_tramp_var */
4197                                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4198                                         /* Load the trampoline address */
4199                                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 8);
4200                                         /* Call it if it is non-null */
4201                                         amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
4202                                         label = code;
4203                                         amd64_branch8 (code, X86_CC_Z, 0, FALSE);
4204                                         amd64_call_reg (code, AMD64_R11);
4205                                         amd64_patch (label, code);
4206                                 } else {
4207                                         /* 
4208                                          * Read from the single stepping trigger page. This will cause a
4209                                          * SIGSEGV when single stepping is enabled.
4210                                          * We do this _before_ the breakpoint, so single stepping after
4211                                          * a breakpoint is hit will step to the next IL offset.
4212                                          */
4213                                         MonoInst *var = cfg->arch.ss_trigger_page_var;
4214
4215                                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4216                                         amd64_alu_membase_imm_size (code, X86_CMP, AMD64_R11, 0, 0, 4);
4217                                 }
4218                         }
4219
4220                         /* 
4221                          * This is the address which is saved in seq points, 
4222                          */
4223                         mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
4224
4225                         if (cfg->compile_aot) {
4226                                 guint32 offset = code - cfg->native_code;
4227                                 guint32 val;
4228                                 MonoInst *info_var = cfg->arch.seq_point_info_var;
4229                                 guint8 *label;
4230
4231                                 /* Load info var */
4232                                 amd64_mov_reg_membase (code, AMD64_R11, info_var->inst_basereg, info_var->inst_offset, 8);
4233                                 val = ((offset) * sizeof (guint8*)) + MONO_STRUCT_OFFSET (SeqPointInfo, bp_addrs);
4234                                 /* Load the info->bp_addrs [offset], which is either NULL or the address of the breakpoint trampoline */
4235                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, val, 8);
4236                                 amd64_test_reg_reg (code, AMD64_R11, AMD64_R11);
4237                                 label = code;
4238                                 amd64_branch8 (code, X86_CC_Z, 0, FALSE);
4239                                 /* Call the trampoline */
4240                                 amd64_call_reg (code, AMD64_R11);
4241                                 amd64_patch (label, code);
4242                         } else {
4243                                 /* 
4244                                  * A placeholder for a possible breakpoint inserted by
4245                                  * mono_arch_set_breakpoint ().
4246                                  */
4247                                 for (i = 0; i < breakpoint_size; ++i)
4248                                         x86_nop (code);
4249                         }
4250                         /*
4251                          * Add an additional nop so skipping the bp doesn't cause the ip to point
4252                          * to another IL offset.
4253                          */
4254                         x86_nop (code);
4255                         break;
4256                 }
4257                 case OP_ADDCC:
4258                 case OP_LADDCC:
4259                 case OP_LADD:
4260                         amd64_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
4261                         break;
4262                 case OP_ADC:
4263                         amd64_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
4264                         break;
4265                 case OP_ADD_IMM:
4266                 case OP_LADD_IMM:
4267                         g_assert (amd64_is_imm32 (ins->inst_imm));
4268                         amd64_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
4269                         break;
4270                 case OP_ADC_IMM:
4271                         g_assert (amd64_is_imm32 (ins->inst_imm));
4272                         amd64_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
4273                         break;
4274                 case OP_SUBCC:
4275                 case OP_LSUBCC:
4276                 case OP_LSUB:
4277                         amd64_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
4278                         break;
4279                 case OP_SBB:
4280                         amd64_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
4281                         break;
4282                 case OP_SUB_IMM:
4283                 case OP_LSUB_IMM:
4284                         g_assert (amd64_is_imm32 (ins->inst_imm));
4285                         amd64_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
4286                         break;
4287                 case OP_SBB_IMM:
4288                         g_assert (amd64_is_imm32 (ins->inst_imm));
4289                         amd64_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
4290                         break;
4291                 case OP_LAND:
4292                         amd64_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
4293                         break;
4294                 case OP_AND_IMM:
4295                 case OP_LAND_IMM:
4296                         g_assert (amd64_is_imm32 (ins->inst_imm));
4297                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
4298                         break;
4299                 case OP_LMUL:
4300                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
4301                         break;
4302                 case OP_MUL_IMM:
4303                 case OP_LMUL_IMM:
4304                 case OP_IMUL_IMM: {
4305                         guint32 size = (ins->opcode == OP_IMUL_IMM) ? 4 : 8;
4306                         
4307                         switch (ins->inst_imm) {
4308                         case 2:
4309                                 /* MOV r1, r2 */
4310                                 /* ADD r1, r1 */
4311                                 if (ins->dreg != ins->sreg1)
4312                                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, size);
4313                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4314                                 break;
4315                         case 3:
4316                                 /* LEA r1, [r2 + r2*2] */
4317                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4318                                 break;
4319                         case 5:
4320                                 /* LEA r1, [r2 + r2*4] */
4321                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4322                                 break;
4323                         case 6:
4324                                 /* LEA r1, [r2 + r2*2] */
4325                                 /* ADD r1, r1          */
4326                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4327                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4328                                 break;
4329                         case 9:
4330                                 /* LEA r1, [r2 + r2*8] */
4331                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
4332                                 break;
4333                         case 10:
4334                                 /* LEA r1, [r2 + r2*4] */
4335                                 /* ADD r1, r1          */
4336                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4337                                 amd64_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
4338                                 break;
4339                         case 12:
4340                                 /* LEA r1, [r2 + r2*2] */
4341                                 /* SHL r1, 2           */
4342                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
4343                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
4344                                 break;
4345                         case 25:
4346                                 /* LEA r1, [r2 + r2*4] */
4347                                 /* LEA r1, [r1 + r1*4] */
4348                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4349                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
4350                                 break;
4351                         case 100:
4352                                 /* LEA r1, [r2 + r2*4] */
4353                                 /* SHL r1, 2           */
4354                                 /* LEA r1, [r1 + r1*4] */
4355                                 amd64_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
4356                                 amd64_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
4357                                 amd64_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
4358                                 break;
4359                         default:
4360                                 amd64_imul_reg_reg_imm_size (code, ins->dreg, ins->sreg1, ins->inst_imm, size);
4361                                 break;
4362                         }
4363                         break;
4364                 }
4365                 case OP_LDIV:
4366                 case OP_LREM:
4367 #if defined( __native_client_codegen__ )
4368                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0);
4369                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, TRUE, "DivideByZeroException");
4370 #endif
4371                         /* Regalloc magic makes the div/rem cases the same */
4372                         if (ins->sreg2 == AMD64_RDX) {
4373                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4374                                 amd64_cdq (code);
4375                                 amd64_div_membase (code, AMD64_RSP, -8, TRUE);
4376                         } else {
4377                                 amd64_cdq (code);
4378                                 amd64_div_reg (code, ins->sreg2, TRUE);
4379                         }
4380                         break;
4381                 case OP_LDIV_UN:
4382                 case OP_LREM_UN:
4383 #if defined( __native_client_codegen__ )
4384                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0);
4385                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, TRUE, "DivideByZeroException");
4386 #endif
4387                         if (ins->sreg2 == AMD64_RDX) {
4388                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4389                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4390                                 amd64_div_membase (code, AMD64_RSP, -8, FALSE);
4391                         } else {
4392                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4393                                 amd64_div_reg (code, ins->sreg2, FALSE);
4394                         }
4395                         break;
4396                 case OP_IDIV:
4397                 case OP_IREM:
4398 #if defined( __native_client_codegen__ )
4399                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg2, 0);
4400                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, TRUE, "DivideByZeroException");
4401 #endif
4402                         if (ins->sreg2 == AMD64_RDX) {
4403                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4404                                 amd64_cdq_size (code, 4);
4405                                 amd64_div_membase_size (code, AMD64_RSP, -8, TRUE, 4);
4406                         } else {
4407                                 amd64_cdq_size (code, 4);
4408                                 amd64_div_reg_size (code, ins->sreg2, TRUE, 4);
4409                         }
4410                         break;
4411                 case OP_IDIV_UN:
4412                 case OP_IREM_UN:
4413 #if defined( __native_client_codegen__ )
4414                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg2, 0, 4);
4415                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, TRUE, "DivideByZeroException");
4416 #endif
4417                         if (ins->sreg2 == AMD64_RDX) {
4418                                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDX, 8);
4419                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4420                                 amd64_div_membase_size (code, AMD64_RSP, -8, FALSE, 4);
4421                         } else {
4422                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RDX, AMD64_RDX);
4423                                 amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
4424                         }
4425                         break;
4426                 case OP_LMUL_OVF:
4427                         amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
4428                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4429                         break;
4430                 case OP_LOR:
4431                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4432                         break;
4433                 case OP_OR_IMM:
4434                 case OP_LOR_IMM:
4435                         g_assert (amd64_is_imm32 (ins->inst_imm));
4436                         amd64_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
4437                         break;
4438                 case OP_LXOR:
4439                         amd64_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
4440                         break;
4441                 case OP_XOR_IMM:
4442                 case OP_LXOR_IMM:
4443                         g_assert (amd64_is_imm32 (ins->inst_imm));
4444                         amd64_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
4445                         break;
4446                 case OP_LSHL:
4447                         g_assert (ins->sreg2 == AMD64_RCX);
4448                         amd64_shift_reg (code, X86_SHL, ins->dreg);
4449                         break;
4450                 case OP_LSHR:
4451                         g_assert (ins->sreg2 == AMD64_RCX);
4452                         amd64_shift_reg (code, X86_SAR, ins->dreg);
4453                         break;
4454                 case OP_SHR_IMM:
4455                 case OP_LSHR_IMM:
4456                         g_assert (amd64_is_imm32 (ins->inst_imm));
4457                         amd64_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
4458                         break;
4459                 case OP_SHR_UN_IMM:
4460                         g_assert (amd64_is_imm32 (ins->inst_imm));
4461                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
4462                         break;
4463                 case OP_LSHR_UN_IMM:
4464                         g_assert (amd64_is_imm32 (ins->inst_imm));
4465                         amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
4466                         break;
4467                 case OP_LSHR_UN:
4468                         g_assert (ins->sreg2 == AMD64_RCX);
4469                         amd64_shift_reg (code, X86_SHR, ins->dreg);
4470                         break;
4471                 case OP_SHL_IMM:
4472                 case OP_LSHL_IMM:
4473                         g_assert (amd64_is_imm32 (ins->inst_imm));
4474                         amd64_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
4475                         break;
4476
4477                 case OP_IADDCC:
4478                 case OP_IADD:
4479                         amd64_alu_reg_reg_size (code, X86_ADD, ins->sreg1, ins->sreg2, 4);
4480                         break;
4481                 case OP_IADC:
4482                         amd64_alu_reg_reg_size (code, X86_ADC, ins->sreg1, ins->sreg2, 4);
4483                         break;
4484                 case OP_IADD_IMM:
4485                         amd64_alu_reg_imm_size (code, X86_ADD, ins->dreg, ins->inst_imm, 4);
4486                         break;
4487                 case OP_IADC_IMM:
4488                         amd64_alu_reg_imm_size (code, X86_ADC, ins->dreg, ins->inst_imm, 4);
4489                         break;
4490                 case OP_ISUBCC:
4491                 case OP_ISUB:
4492                         amd64_alu_reg_reg_size (code, X86_SUB, ins->sreg1, ins->sreg2, 4);
4493                         break;
4494                 case OP_ISBB:
4495                         amd64_alu_reg_reg_size (code, X86_SBB, ins->sreg1, ins->sreg2, 4);
4496                         break;
4497                 case OP_ISUB_IMM:
4498                         amd64_alu_reg_imm_size (code, X86_SUB, ins->dreg, ins->inst_imm, 4);
4499                         break;
4500                 case OP_ISBB_IMM:
4501                         amd64_alu_reg_imm_size (code, X86_SBB, ins->dreg, ins->inst_imm, 4);
4502                         break;
4503                 case OP_IAND:
4504                         amd64_alu_reg_reg_size (code, X86_AND, ins->sreg1, ins->sreg2, 4);
4505                         break;
4506                 case OP_IAND_IMM:
4507                         amd64_alu_reg_imm_size (code, X86_AND, ins->sreg1, ins->inst_imm, 4);
4508                         break;
4509                 case OP_IOR:
4510                         amd64_alu_reg_reg_size (code, X86_OR, ins->sreg1, ins->sreg2, 4);
4511                         break;
4512                 case OP_IOR_IMM:
4513                         amd64_alu_reg_imm_size (code, X86_OR, ins->sreg1, ins->inst_imm, 4);
4514                         break;
4515                 case OP_IXOR:
4516                         amd64_alu_reg_reg_size (code, X86_XOR, ins->sreg1, ins->sreg2, 4);
4517                         break;
4518                 case OP_IXOR_IMM:
4519                         amd64_alu_reg_imm_size (code, X86_XOR, ins->sreg1, ins->inst_imm, 4);
4520                         break;
4521                 case OP_INEG:
4522                         amd64_neg_reg_size (code, ins->sreg1, 4);
4523                         break;
4524                 case OP_INOT:
4525                         amd64_not_reg_size (code, ins->sreg1, 4);
4526                         break;
4527                 case OP_ISHL:
4528                         g_assert (ins->sreg2 == AMD64_RCX);
4529                         amd64_shift_reg_size (code, X86_SHL, ins->dreg, 4);
4530                         break;
4531                 case OP_ISHR:
4532                         g_assert (ins->sreg2 == AMD64_RCX);
4533                         amd64_shift_reg_size (code, X86_SAR, ins->dreg, 4);
4534                         break;
4535                 case OP_ISHR_IMM:
4536                         amd64_shift_reg_imm_size (code, X86_SAR, ins->dreg, ins->inst_imm, 4);
4537                         break;
4538                 case OP_ISHR_UN_IMM:
4539                         amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, ins->inst_imm, 4);
4540                         break;
4541                 case OP_ISHR_UN:
4542                         g_assert (ins->sreg2 == AMD64_RCX);
4543                         amd64_shift_reg_size (code, X86_SHR, ins->dreg, 4);
4544                         break;
4545                 case OP_ISHL_IMM:
4546                         amd64_shift_reg_imm_size (code, X86_SHL, ins->dreg, ins->inst_imm, 4);
4547                         break;
4548                 case OP_IMUL:
4549                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
4550                         break;
4551                 case OP_IMUL_OVF:
4552                         amd64_imul_reg_reg_size (code, ins->sreg1, ins->sreg2, 4);
4553                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4554                         break;
4555                 case OP_IMUL_OVF_UN:
4556                 case OP_LMUL_OVF_UN: {
4557                         /* the mul operation and the exception check should most likely be split */
4558                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
4559                         int size = (ins->opcode == OP_IMUL_OVF_UN) ? 4 : 8;
4560                         /*g_assert (ins->sreg2 == X86_EAX);
4561                         g_assert (ins->dreg == X86_EAX);*/
4562                         if (ins->sreg2 == X86_EAX) {
4563                                 non_eax_reg = ins->sreg1;
4564                         } else if (ins->sreg1 == X86_EAX) {
4565                                 non_eax_reg = ins->sreg2;
4566                         } else {
4567                                 /* no need to save since we're going to store to it anyway */
4568                                 if (ins->dreg != X86_EAX) {
4569                                         saved_eax = TRUE;
4570                                         amd64_push_reg (code, X86_EAX);
4571                                 }
4572                                 amd64_mov_reg_reg (code, X86_EAX, ins->sreg1, size);
4573                                 non_eax_reg = ins->sreg2;
4574                         }
4575                         if (ins->dreg == X86_EDX) {
4576                                 if (!saved_eax) {
4577                                         saved_eax = TRUE;
4578                                         amd64_push_reg (code, X86_EAX);
4579                                 }
4580                         } else {
4581                                 saved_edx = TRUE;
4582                                 amd64_push_reg (code, X86_EDX);
4583                         }
4584                         amd64_mul_reg_size (code, non_eax_reg, FALSE, size);
4585                         /* save before the check since pop and mov don't change the flags */
4586                         if (ins->dreg != X86_EAX)
4587                                 amd64_mov_reg_reg (code, ins->dreg, X86_EAX, size);
4588                         if (saved_edx)
4589                                 amd64_pop_reg (code, X86_EDX);
4590                         if (saved_eax)
4591                                 amd64_pop_reg (code, X86_EAX);
4592                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
4593                         break;
4594                 }
4595                 case OP_ICOMPARE:
4596                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
4597                         break;
4598                 case OP_ICOMPARE_IMM:
4599                         amd64_alu_reg_imm_size (code, X86_CMP, ins->sreg1, ins->inst_imm, 4);
4600                         break;
4601                 case OP_IBEQ:
4602                 case OP_IBLT:
4603                 case OP_IBGT:
4604                 case OP_IBGE:
4605                 case OP_IBLE:
4606                 case OP_LBEQ:
4607                 case OP_LBLT:
4608                 case OP_LBGT:
4609                 case OP_LBGE:
4610                 case OP_LBLE:
4611                 case OP_IBNE_UN:
4612                 case OP_IBLT_UN:
4613                 case OP_IBGT_UN:
4614                 case OP_IBGE_UN:
4615                 case OP_IBLE_UN:
4616                 case OP_LBNE_UN:
4617                 case OP_LBLT_UN:
4618                 case OP_LBGT_UN:
4619                 case OP_LBGE_UN:
4620                 case OP_LBLE_UN:
4621                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
4622                         break;
4623
4624                 case OP_CMOV_IEQ:
4625                 case OP_CMOV_IGE:
4626                 case OP_CMOV_IGT:
4627                 case OP_CMOV_ILE:
4628                 case OP_CMOV_ILT:
4629                 case OP_CMOV_INE_UN:
4630                 case OP_CMOV_IGE_UN:
4631                 case OP_CMOV_IGT_UN:
4632                 case OP_CMOV_ILE_UN:
4633                 case OP_CMOV_ILT_UN:
4634                 case OP_CMOV_LEQ:
4635                 case OP_CMOV_LGE:
4636                 case OP_CMOV_LGT:
4637                 case OP_CMOV_LLE:
4638                 case OP_CMOV_LLT:
4639                 case OP_CMOV_LNE_UN:
4640                 case OP_CMOV_LGE_UN:
4641                 case OP_CMOV_LGT_UN:
4642                 case OP_CMOV_LLE_UN:
4643                 case OP_CMOV_LLT_UN:
4644                         g_assert (ins->dreg == ins->sreg1);
4645                         /* This needs to operate on 64 bit values */
4646                         amd64_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
4647                         break;
4648
4649                 case OP_LNOT:
4650                         amd64_not_reg (code, ins->sreg1);
4651                         break;
4652                 case OP_LNEG:
4653                         amd64_neg_reg (code, ins->sreg1);
4654                         break;
4655
4656                 case OP_ICONST:
4657                 case OP_I8CONST:
4658                         if ((((guint64)ins->inst_c0) >> 32) == 0 && !mini_get_debug_options()->single_imm_size)
4659                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 4);
4660                         else
4661                                 amd64_mov_reg_imm_size (code, ins->dreg, ins->inst_c0, 8);
4662                         break;
4663                 case OP_AOTCONST:
4664                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4665                         amd64_mov_reg_membase (code, ins->dreg, AMD64_RIP, 0, sizeof(gpointer));
4666                         break;
4667                 case OP_JUMP_TABLE:
4668                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
4669                         amd64_mov_reg_imm_size (code, ins->dreg, 0, 8);
4670                         break;
4671                 case OP_MOVE:
4672                         if (ins->dreg != ins->sreg1)
4673                                 amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, sizeof(mgreg_t));
4674                         break;
4675                 case OP_AMD64_SET_XMMREG_R4: {
4676                         if (cfg->r4fp) {
4677                                 if (ins->dreg != ins->sreg1)
4678                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
4679                         } else {
4680                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
4681                         }
4682                         break;
4683                 }
4684                 case OP_AMD64_SET_XMMREG_R8: {
4685                         if (ins->dreg != ins->sreg1)
4686                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
4687                         break;
4688                 }
4689                 case OP_TAILCALL: {
4690                         MonoCallInst *call = (MonoCallInst*)ins;
4691                         int i, save_area_offset;
4692
4693                         g_assert (!cfg->method->save_lmf);
4694
4695                         /* Restore callee saved registers */
4696                         save_area_offset = cfg->arch.reg_save_area_offset;
4697                         for (i = 0; i < AMD64_NREG; ++i)
4698                                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->used_int_regs & (1 << i))) {
4699                                         amd64_mov_reg_membase (code, i, cfg->frame_reg, save_area_offset, 8);
4700                                         save_area_offset += 8;
4701                                 }
4702
4703                         if (cfg->arch.omit_fp) {
4704                                 if (cfg->arch.stack_alloc_size)
4705                                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
4706                                 // FIXME:
4707                                 if (call->stack_usage)
4708                                         NOT_IMPLEMENTED;
4709                         } else {
4710                                 /* Copy arguments on the stack to our argument area */
4711                                 for (i = 0; i < call->stack_usage; i += sizeof(mgreg_t)) {
4712                                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, i, sizeof(mgreg_t));
4713                                         amd64_mov_membase_reg (code, AMD64_RBP, 16 + i, AMD64_RAX, sizeof(mgreg_t));
4714                                 }
4715
4716                                 amd64_leave (code);
4717                         }
4718
4719                         offset = code - cfg->native_code;
4720                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, call->method);
4721                         if (cfg->compile_aot)
4722                                 amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
4723                         else
4724                                 amd64_set_reg_template (code, AMD64_R11);
4725                         amd64_jump_reg (code, AMD64_R11);
4726                         ins->flags |= MONO_INST_GC_CALLSITE;
4727                         ins->backend.pc_offset = code - cfg->native_code;
4728                         break;
4729                 }
4730                 case OP_CHECK_THIS:
4731                         /* ensure ins->sreg1 is not NULL */
4732                         amd64_alu_membase_imm_size (code, X86_CMP, ins->sreg1, 0, 0, 4);
4733                         break;
4734                 case OP_ARGLIST: {
4735                         amd64_lea_membase (code, AMD64_R11, cfg->frame_reg, cfg->sig_cookie);
4736                         amd64_mov_membase_reg (code, ins->sreg1, 0, AMD64_R11, sizeof(gpointer));
4737                         break;
4738                 }
4739                 case OP_CALL:
4740                 case OP_FCALL:
4741                 case OP_RCALL:
4742                 case OP_LCALL:
4743                 case OP_VCALL:
4744                 case OP_VCALL2:
4745                 case OP_VOIDCALL:
4746                         call = (MonoCallInst*)ins;
4747                         /*
4748                          * The AMD64 ABI forces callers to know about varargs.
4749                          */
4750                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke))
4751                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4752                         else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4753                                 /* 
4754                                  * Since the unmanaged calling convention doesn't contain a 
4755                                  * 'vararg' entry, we have to treat every pinvoke call as a
4756                                  * potential vararg call.
4757                                  */
4758                                 guint32 nregs, i;
4759                                 nregs = 0;
4760                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4761                                         if (call->used_fregs & (1 << i))
4762                                                 nregs ++;
4763                                 if (!nregs)
4764                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4765                                 else
4766                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4767                         }
4768
4769                         if (ins->flags & MONO_INST_HAS_METHOD)
4770                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method, FALSE);
4771                         else
4772                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr, FALSE);
4773                         ins->flags |= MONO_INST_GC_CALLSITE;
4774                         ins->backend.pc_offset = code - cfg->native_code;
4775                         code = emit_move_return_value (cfg, ins, code);
4776                         break;
4777                 case OP_FCALL_REG:
4778                 case OP_RCALL_REG:
4779                 case OP_LCALL_REG:
4780                 case OP_VCALL_REG:
4781                 case OP_VCALL2_REG:
4782                 case OP_VOIDCALL_REG:
4783                 case OP_CALL_REG:
4784                         call = (MonoCallInst*)ins;
4785
4786                         if (AMD64_IS_ARGUMENT_REG (ins->sreg1)) {
4787                                 amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4788                                 ins->sreg1 = AMD64_R11;
4789                         }
4790
4791                         /*
4792                          * The AMD64 ABI forces callers to know about varargs.
4793                          */
4794                         if ((call->signature->call_convention == MONO_CALL_VARARG) && (call->signature->pinvoke)) {
4795                                 if (ins->sreg1 == AMD64_RAX) {
4796                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4797                                         ins->sreg1 = AMD64_R11;
4798                                 }
4799                                 amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4800                         } else if ((cfg->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) && (cfg->method->klass->image != mono_defaults.corlib)) {
4801                                 /* 
4802                                  * Since the unmanaged calling convention doesn't contain a 
4803                                  * 'vararg' entry, we have to treat every pinvoke call as a
4804                                  * potential vararg call.
4805                                  */
4806                                 guint32 nregs, i;
4807                                 nregs = 0;
4808                                 for (i = 0; i < AMD64_XMM_NREG; ++i)
4809                                         if (call->used_fregs & (1 << i))
4810                                                 nregs ++;
4811                                 if (ins->sreg1 == AMD64_RAX) {
4812                                         amd64_mov_reg_reg (code, AMD64_R11, AMD64_RAX, 8);
4813                                         ins->sreg1 = AMD64_R11;
4814                                 }
4815                                 if (!nregs)
4816                                         amd64_alu_reg_reg (code, X86_XOR, AMD64_RAX, AMD64_RAX);
4817                                 else
4818                                         amd64_mov_reg_imm (code, AMD64_RAX, nregs);
4819                         }
4820
4821                         amd64_call_reg (code, ins->sreg1);
4822                         ins->flags |= MONO_INST_GC_CALLSITE;
4823                         ins->backend.pc_offset = code - cfg->native_code;
4824                         code = emit_move_return_value (cfg, ins, code);
4825                         break;
4826                 case OP_FCALL_MEMBASE:
4827                 case OP_RCALL_MEMBASE:
4828                 case OP_LCALL_MEMBASE:
4829                 case OP_VCALL_MEMBASE:
4830                 case OP_VCALL2_MEMBASE:
4831                 case OP_VOIDCALL_MEMBASE:
4832                 case OP_CALL_MEMBASE:
4833                         call = (MonoCallInst*)ins;
4834
4835                         amd64_call_membase (code, ins->sreg1, ins->inst_offset);
4836                         ins->flags |= MONO_INST_GC_CALLSITE;
4837                         ins->backend.pc_offset = code - cfg->native_code;
4838                         code = emit_move_return_value (cfg, ins, code);
4839                         break;
4840                 case OP_DYN_CALL: {
4841                         int i;
4842                         MonoInst *var = cfg->dyn_call_var;
4843
4844                         g_assert (var->opcode == OP_REGOFFSET);
4845
4846                         /* r11 = args buffer filled by mono_arch_get_dyn_call_args () */
4847                         amd64_mov_reg_reg (code, AMD64_R11, ins->sreg1, 8);
4848                         /* r10 = ftn */
4849                         amd64_mov_reg_reg (code, AMD64_R10, ins->sreg2, 8);
4850
4851                         /* Save args buffer */
4852                         amd64_mov_membase_reg (code, var->inst_basereg, var->inst_offset, AMD64_R11, 8);
4853
4854                         /* Set argument registers */
4855                         for (i = 0; i < PARAM_REGS; ++i)
4856                                 amd64_mov_reg_membase (code, param_regs [i], AMD64_R11, i * sizeof(mgreg_t), sizeof(mgreg_t));
4857                         
4858                         /* Make the call */
4859                         amd64_call_reg (code, AMD64_R10);
4860
4861                         ins->flags |= MONO_INST_GC_CALLSITE;
4862                         ins->backend.pc_offset = code - cfg->native_code;
4863
4864                         /* Save result */
4865                         amd64_mov_reg_membase (code, AMD64_R11, var->inst_basereg, var->inst_offset, 8);
4866                         amd64_mov_membase_reg (code, AMD64_R11, MONO_STRUCT_OFFSET (DynCallArgs, res), AMD64_RAX, 8);
4867                         break;
4868                 }
4869                 case OP_AMD64_SAVE_SP_TO_LMF: {
4870                         MonoInst *lmf_var = cfg->lmf_var;
4871                         amd64_mov_membase_reg (code, lmf_var->inst_basereg, lmf_var->inst_offset + MONO_STRUCT_OFFSET (MonoLMF, rsp), AMD64_RSP, 8);
4872                         break;
4873                 }
4874                 case OP_X86_PUSH:
4875                         g_assert_not_reached ();
4876                         amd64_push_reg (code, ins->sreg1);
4877                         break;
4878                 case OP_X86_PUSH_IMM:
4879                         g_assert_not_reached ();
4880                         g_assert (amd64_is_imm32 (ins->inst_imm));
4881                         amd64_push_imm (code, ins->inst_imm);
4882                         break;
4883                 case OP_X86_PUSH_MEMBASE:
4884                         g_assert_not_reached ();
4885                         amd64_push_membase (code, ins->inst_basereg, ins->inst_offset);
4886                         break;
4887                 case OP_X86_PUSH_OBJ: {
4888                         int size = ALIGN_TO (ins->inst_imm, 8);
4889
4890                         g_assert_not_reached ();
4891
4892                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4893                         amd64_push_reg (code, AMD64_RDI);
4894                         amd64_push_reg (code, AMD64_RSI);
4895                         amd64_push_reg (code, AMD64_RCX);
4896                         if (ins->inst_offset)
4897                                 amd64_lea_membase (code, AMD64_RSI, ins->inst_basereg, ins->inst_offset);
4898                         else
4899                                 amd64_mov_reg_reg (code, AMD64_RSI, ins->inst_basereg, 8);
4900                         amd64_lea_membase (code, AMD64_RDI, AMD64_RSP, (3 * 8));
4901                         amd64_mov_reg_imm (code, AMD64_RCX, (size >> 3));
4902                         amd64_cld (code);
4903                         amd64_prefix (code, X86_REP_PREFIX);
4904                         amd64_movsd (code);
4905                         amd64_pop_reg (code, AMD64_RCX);
4906                         amd64_pop_reg (code, AMD64_RSI);
4907                         amd64_pop_reg (code, AMD64_RDI);
4908                         break;
4909                 }
4910                 case OP_GENERIC_CLASS_INIT: {
4911                         static int byte_offset = -1;
4912                         static guint8 bitmask;
4913                         guint8 *jump;
4914
4915                         g_assert (ins->sreg1 == MONO_AMD64_ARG_REG1);
4916
4917                         if (byte_offset < 0)
4918                                 mono_marshal_find_bitfield_offset (MonoVTable, initialized, &byte_offset, &bitmask);
4919
4920                         amd64_test_membase_imm_size (code, ins->sreg1, byte_offset, bitmask, 1);
4921                         jump = code;
4922                         amd64_branch8 (code, X86_CC_NZ, -1, 1);
4923
4924                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_generic_class_init", FALSE);
4925                         ins->flags |= MONO_INST_GC_CALLSITE;
4926                         ins->backend.pc_offset = code - cfg->native_code;
4927
4928                         x86_patch (jump, code);
4929                         break;
4930                 }
4931
4932                 case OP_X86_LEA:
4933                         amd64_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
4934                         break;
4935                 case OP_X86_LEA_MEMBASE:
4936                         amd64_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
4937                         break;
4938                 case OP_X86_XCHG:
4939                         amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
4940                         break;
4941                 case OP_LOCALLOC:
4942                         /* keep alignment */
4943                         amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
4944                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
4945                         code = mono_emit_stack_alloc (cfg, code, ins);
4946                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4947                         if (cfg->param_area)
4948                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4949                         break;
4950                 case OP_LOCALLOC_IMM: {
4951                         guint32 size = ins->inst_imm;
4952                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
4953
4954                         if (ins->flags & MONO_INST_INIT) {
4955                                 if (size < 64) {
4956                                         int i;
4957
4958                                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4959                                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
4960
4961                                         for (i = 0; i < size; i += 8)
4962                                                 amd64_mov_membase_reg (code, AMD64_RSP, i, ins->dreg, 8);
4963                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);                                      
4964                                 } else {
4965                                         amd64_mov_reg_imm (code, ins->dreg, size);
4966                                         ins->sreg1 = ins->dreg;
4967
4968                                         code = mono_emit_stack_alloc (cfg, code, ins);
4969                                         amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4970                                 }
4971                         } else {
4972                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, size);
4973                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RSP, 8);
4974                         }
4975                         if (cfg->param_area)
4976                                 amd64_alu_reg_imm (code, X86_ADD, ins->dreg, cfg->param_area);
4977                         break;
4978                 }
4979                 case OP_THROW: {
4980                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4981                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4982                                              (gpointer)"mono_arch_throw_exception", FALSE);
4983                         ins->flags |= MONO_INST_GC_CALLSITE;
4984                         ins->backend.pc_offset = code - cfg->native_code;
4985                         break;
4986                 }
4987                 case OP_RETHROW: {
4988                         amd64_mov_reg_reg (code, AMD64_ARG_REG1, ins->sreg1, 8);
4989                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
4990                                              (gpointer)"mono_arch_rethrow_exception", FALSE);
4991                         ins->flags |= MONO_INST_GC_CALLSITE;
4992                         ins->backend.pc_offset = code - cfg->native_code;
4993                         break;
4994                 }
4995                 case OP_CALL_HANDLER: 
4996                         /* Align stack */
4997                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
4998                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
4999                         amd64_call_imm (code, 0);
5000                         mono_cfg_add_try_hole (cfg, ins->inst_eh_block, code, bb);
5001                         /* Restore stack alignment */
5002                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
5003                         break;
5004                 case OP_START_HANDLER: {
5005                         /* Even though we're saving RSP, use sizeof */
5006                         /* gpointer because spvar is of type IntPtr */
5007                         /* see: mono_create_spvar_for_region */
5008                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
5009                         amd64_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, AMD64_RSP, sizeof(gpointer));
5010
5011                         if ((MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY) ||
5012                                  MONO_BBLOCK_IS_IN_REGION (bb, MONO_REGION_FINALLY)) &&
5013                                 cfg->param_area) {
5014                                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
5015                         }
5016                         break;
5017                 }
5018                 case OP_ENDFINALLY: {
5019                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
5020                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer));
5021                         amd64_ret (code);
5022                         break;
5023                 }
5024                 case OP_ENDFILTER: {
5025                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
5026                         amd64_mov_reg_membase (code, AMD64_RSP, spvar->inst_basereg, spvar->inst_offset, sizeof(gpointer));
5027                         /* The local allocator will put the result into RAX */
5028                         amd64_ret (code);
5029                         break;
5030                 }
5031                 case OP_GET_EX_OBJ:
5032                         if (ins->dreg != AMD64_RAX)
5033                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, sizeof (gpointer));
5034                         break;
5035                 case OP_LABEL:
5036                         ins->inst_c0 = code - cfg->native_code;
5037                         break;
5038                 case OP_BR:
5039                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
5040                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
5041                         //break;
5042                                 if (ins->inst_target_bb->native_offset) {
5043                                         amd64_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
5044                                 } else {
5045                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
5046                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
5047                                             x86_is_imm8 (ins->inst_target_bb->max_offset - offset))
5048                                                 x86_jump8 (code, 0);
5049                                         else 
5050                                                 x86_jump32 (code, 0);
5051                         }
5052                         break;
5053                 case OP_BR_REG:
5054                         amd64_jump_reg (code, ins->sreg1);
5055                         break;
5056                 case OP_ICNEQ:
5057                 case OP_ICGE:
5058                 case OP_ICLE:
5059                 case OP_ICGE_UN:
5060                 case OP_ICLE_UN:
5061
5062                 case OP_CEQ:
5063                 case OP_LCEQ:
5064                 case OP_ICEQ:
5065                 case OP_CLT:
5066                 case OP_LCLT:
5067                 case OP_ICLT:
5068                 case OP_CGT:
5069                 case OP_ICGT:
5070                 case OP_LCGT:
5071                 case OP_CLT_UN:
5072                 case OP_LCLT_UN:
5073                 case OP_ICLT_UN:
5074                 case OP_CGT_UN:
5075                 case OP_LCGT_UN:
5076                 case OP_ICGT_UN:
5077                         amd64_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
5078                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
5079                         break;
5080                 case OP_COND_EXC_EQ:
5081                 case OP_COND_EXC_NE_UN:
5082                 case OP_COND_EXC_LT:
5083                 case OP_COND_EXC_LT_UN:
5084                 case OP_COND_EXC_GT:
5085                 case OP_COND_EXC_GT_UN:
5086                 case OP_COND_EXC_GE:
5087                 case OP_COND_EXC_GE_UN:
5088                 case OP_COND_EXC_LE:
5089                 case OP_COND_EXC_LE_UN:
5090                 case OP_COND_EXC_IEQ:
5091                 case OP_COND_EXC_INE_UN:
5092                 case OP_COND_EXC_ILT:
5093                 case OP_COND_EXC_ILT_UN:
5094                 case OP_COND_EXC_IGT:
5095                 case OP_COND_EXC_IGT_UN:
5096                 case OP_COND_EXC_IGE:
5097                 case OP_COND_EXC_IGE_UN:
5098                 case OP_COND_EXC_ILE:
5099                 case OP_COND_EXC_ILE_UN:
5100                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
5101                         break;
5102                 case OP_COND_EXC_OV:
5103                 case OP_COND_EXC_NO:
5104                 case OP_COND_EXC_C:
5105                 case OP_COND_EXC_NC:
5106                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
5107                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
5108                         break;
5109                 case OP_COND_EXC_IOV:
5110                 case OP_COND_EXC_INO:
5111                 case OP_COND_EXC_IC:
5112                 case OP_COND_EXC_INC:
5113                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], 
5114                                                     (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
5115                         break;
5116
5117                 /* floating point opcodes */
5118                 case OP_R8CONST: {
5119                         double d = *(double *)ins->inst_p0;
5120
5121                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
5122                                 amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
5123                         }
5124                         else {
5125                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
5126                                 amd64_sse_movsd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5127                         }
5128                         break;
5129                 }
5130                 case OP_R4CONST: {
5131                         float f = *(float *)ins->inst_p0;
5132
5133                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
5134                                 if (cfg->r4fp)
5135                                         amd64_sse_xorps_reg_reg (code, ins->dreg, ins->dreg);
5136                                 else
5137                                         amd64_sse_xorpd_reg_reg (code, ins->dreg, ins->dreg);
5138                         }
5139                         else {
5140                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
5141                                 amd64_sse_movss_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5142                                 if (!cfg->r4fp)
5143                                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5144                         }
5145                         break;
5146                 }
5147                 case OP_STORER8_MEMBASE_REG:
5148                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
5149                         break;
5150                 case OP_LOADR8_MEMBASE:
5151                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5152                         break;
5153                 case OP_STORER4_MEMBASE_REG:
5154                         if (cfg->r4fp) {
5155                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
5156                         } else {
5157                                 /* This requires a double->single conversion */
5158                                 amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
5159                                 amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, MONO_ARCH_FP_SCRATCH_REG);
5160                         }
5161                         break;
5162                 case OP_LOADR4_MEMBASE:
5163                         if (cfg->r4fp) {
5164                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5165                         } else {
5166                                 amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5167                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5168                         }
5169                         break;
5170                 case OP_ICONV_TO_R4:
5171                         if (cfg->r4fp) {
5172                                 amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5173                         } else {
5174                                 amd64_sse_cvtsi2ss_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5175                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5176                         }
5177                         break;
5178                 case OP_ICONV_TO_R8:
5179                         amd64_sse_cvtsi2sd_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5180                         break;
5181                 case OP_LCONV_TO_R4:
5182                         if (cfg->r4fp) {
5183                                 amd64_sse_cvtsi2ss_reg_reg (code, ins->dreg, ins->sreg1);
5184                         } else {
5185                                 amd64_sse_cvtsi2ss_reg_reg (code, ins->dreg, ins->sreg1);
5186                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5187                         }
5188                         break;
5189                 case OP_LCONV_TO_R8:
5190                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
5191                         break;
5192                 case OP_FCONV_TO_R4:
5193                         if (cfg->r4fp) {
5194                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
5195                         } else {
5196                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg1);
5197                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5198                         }
5199                         break;
5200                 case OP_FCONV_TO_I1:
5201                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, TRUE);
5202                         break;
5203                 case OP_FCONV_TO_U1:
5204                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 1, FALSE);
5205                         break;
5206                 case OP_FCONV_TO_I2:
5207                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, TRUE);
5208                         break;
5209                 case OP_FCONV_TO_U2:
5210                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 2, FALSE);
5211                         break;
5212                 case OP_FCONV_TO_U4:
5213                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);                  
5214                         break;
5215                 case OP_FCONV_TO_I4:
5216                 case OP_FCONV_TO_I:
5217                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, TRUE);
5218                         break;
5219                 case OP_FCONV_TO_I8:
5220                         code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE);
5221                         break;
5222
5223                 case OP_RCONV_TO_I1:
5224                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5225                         amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
5226                         break;
5227                 case OP_RCONV_TO_U1:
5228                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5229                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
5230                         break;
5231                 case OP_RCONV_TO_I2:
5232                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5233                         amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
5234                         break;
5235                 case OP_RCONV_TO_U2:
5236                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5237                         amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
5238                         break;
5239                 case OP_RCONV_TO_I4:
5240                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5241                         break;
5242                 case OP_RCONV_TO_U4:
5243                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 4);
5244                         break;
5245                 case OP_RCONV_TO_I8:
5246                         amd64_sse_cvtss2si_reg_reg_size (code, ins->dreg, ins->sreg1, 8);
5247                         break;
5248                 case OP_RCONV_TO_R8:
5249                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->sreg1);
5250                         break;
5251                 case OP_RCONV_TO_R4:
5252                         if (ins->dreg != ins->sreg1)
5253                                 amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
5254                         break;
5255
5256                 case OP_LCONV_TO_R_UN: { 
5257                         guint8 *br [2];
5258
5259                         /* Based on gcc code */
5260                         amd64_test_reg_reg (code, ins->sreg1, ins->sreg1);
5261                         br [0] = code; x86_branch8 (code, X86_CC_S, 0, TRUE);
5262
5263                         /* Positive case */
5264                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, ins->sreg1);
5265                         br [1] = code; x86_jump8 (code, 0);
5266                         amd64_patch (br [0], code);
5267
5268                         /* Negative case */
5269                         /* Save to the red zone */
5270                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RAX, 8);
5271                         amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
5272                         amd64_mov_reg_reg (code, AMD64_RCX, ins->sreg1, 8);
5273                         amd64_mov_reg_reg (code, AMD64_RAX, ins->sreg1, 8);
5274                         amd64_alu_reg_imm (code, X86_AND, AMD64_RCX, 1);
5275                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RAX, 1);
5276                         amd64_alu_reg_imm (code, X86_OR, AMD64_RAX, AMD64_RCX);
5277                         amd64_sse_cvtsi2sd_reg_reg (code, ins->dreg, AMD64_RAX);
5278                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->dreg);
5279                         /* Restore */
5280                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
5281                         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_RSP, -8, 8);
5282                         amd64_patch (br [1], code);
5283                         break;
5284                 }
5285                 case OP_LCONV_TO_OVF_U4:
5286                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0);
5287                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT, TRUE, "OverflowException");
5288                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
5289                         break;
5290                 case OP_LCONV_TO_OVF_I4_UN:
5291                         amd64_alu_reg_imm (code, X86_CMP, ins->sreg1, 0x7fffffff);
5292                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT, FALSE, "OverflowException");
5293                         amd64_mov_reg_reg (code, ins->dreg, ins->sreg1, 8);
5294                         break;
5295                 case OP_FMOVE:
5296                         if (ins->dreg != ins->sreg1)
5297                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
5298                         break;
5299                 case OP_RMOVE:
5300                         if (ins->dreg != ins->sreg1)
5301                                 amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg1);
5302                         break;
5303                 case OP_MOVE_F_TO_I4:
5304                         if (cfg->r4fp) {
5305                                 amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
5306                         } else {
5307                                 amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
5308                                 amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8);
5309                         }
5310                         break;
5311                 case OP_MOVE_I4_TO_F:
5312                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
5313                         if (!cfg->r4fp)
5314                                 amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5315                         break;
5316                 case OP_MOVE_F_TO_I8:
5317                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
5318                         break;
5319                 case OP_MOVE_I8_TO_F:
5320                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
5321                         break;
5322                 case OP_FADD:
5323                         amd64_sse_addsd_reg_reg (code, ins->dreg, ins->sreg2);
5324                         break;
5325                 case OP_FSUB:
5326                         amd64_sse_subsd_reg_reg (code, ins->dreg, ins->sreg2);
5327                         break;          
5328                 case OP_FMUL:
5329                         amd64_sse_mulsd_reg_reg (code, ins->dreg, ins->sreg2);
5330                         break;          
5331                 case OP_FDIV:
5332                         amd64_sse_divsd_reg_reg (code, ins->dreg, ins->sreg2);
5333                         break;          
5334                 case OP_FNEG: {
5335                         static double r8_0 = -0.0;
5336
5337                         g_assert (ins->sreg1 == ins->dreg);
5338                                         
5339                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &r8_0);
5340                         amd64_sse_xorpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5341                         break;
5342                 }
5343                 case OP_SIN:
5344                         EMIT_SSE2_FPFUNC (code, fsin, ins->dreg, ins->sreg1);
5345                         break;          
5346                 case OP_COS:
5347                         EMIT_SSE2_FPFUNC (code, fcos, ins->dreg, ins->sreg1);
5348                         break;          
5349                 case OP_ABS: {
5350                         static guint64 d = 0x7fffffffffffffffUL;
5351
5352                         g_assert (ins->sreg1 == ins->dreg);
5353                                         
5354                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, &d);
5355                         amd64_sse_andpd_reg_membase (code, ins->dreg, AMD64_RIP, 0);
5356                         break;          
5357                 }
5358                 case OP_SQRT:
5359                         EMIT_SSE2_FPFUNC (code, fsqrt, ins->dreg, ins->sreg1);
5360                         break;
5361
5362                 case OP_RADD:
5363                         amd64_sse_addss_reg_reg (code, ins->dreg, ins->sreg2);
5364                         break;
5365                 case OP_RSUB:
5366                         amd64_sse_subss_reg_reg (code, ins->dreg, ins->sreg2);
5367                         break;
5368                 case OP_RMUL:
5369                         amd64_sse_mulss_reg_reg (code, ins->dreg, ins->sreg2);
5370                         break;
5371                 case OP_RDIV:
5372                         amd64_sse_divss_reg_reg (code, ins->dreg, ins->sreg2);
5373                         break;
5374                 case OP_RNEG: {
5375                         static float r4_0 = -0.0;
5376
5377                         g_assert (ins->sreg1 == ins->dreg);
5378
5379                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, &r4_0);
5380                         amd64_sse_movss_reg_membase (code, MONO_ARCH_FP_SCRATCH_REG, AMD64_RIP, 0);
5381                         amd64_sse_xorps_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
5382                         break;
5383                 }
5384
5385                 case OP_IMIN:
5386                         g_assert (cfg->opt & MONO_OPT_CMOV);
5387                         g_assert (ins->dreg == ins->sreg1);
5388                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5389                         amd64_cmov_reg_size (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2, 4);
5390                         break;
5391                 case OP_IMIN_UN:
5392                         g_assert (cfg->opt & MONO_OPT_CMOV);
5393                         g_assert (ins->dreg == ins->sreg1);
5394                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5395                         amd64_cmov_reg_size (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2, 4);
5396                         break;
5397                 case OP_IMAX:
5398                         g_assert (cfg->opt & MONO_OPT_CMOV);
5399                         g_assert (ins->dreg == ins->sreg1);
5400                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5401                         amd64_cmov_reg_size (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2, 4);
5402                         break;
5403                 case OP_IMAX_UN:
5404                         g_assert (cfg->opt & MONO_OPT_CMOV);
5405                         g_assert (ins->dreg == ins->sreg1);
5406                         amd64_alu_reg_reg_size (code, X86_CMP, ins->sreg1, ins->sreg2, 4);
5407                         amd64_cmov_reg_size (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2, 4);
5408                         break;
5409                 case OP_LMIN:
5410                         g_assert (cfg->opt & MONO_OPT_CMOV);
5411                         g_assert (ins->dreg == ins->sreg1);
5412                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5413                         amd64_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
5414                         break;
5415                 case OP_LMIN_UN:
5416                         g_assert (cfg->opt & MONO_OPT_CMOV);
5417                         g_assert (ins->dreg == ins->sreg1);
5418                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5419                         amd64_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
5420                         break;
5421                 case OP_LMAX:
5422                         g_assert (cfg->opt & MONO_OPT_CMOV);
5423                         g_assert (ins->dreg == ins->sreg1);
5424                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5425                         amd64_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
5426                         break;
5427                 case OP_LMAX_UN:
5428                         g_assert (cfg->opt & MONO_OPT_CMOV);
5429                         g_assert (ins->dreg == ins->sreg1);
5430                         amd64_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
5431                         amd64_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
5432                         break;  
5433                 case OP_X86_FPOP:
5434                         break;          
5435                 case OP_FCOMPARE:
5436                         /* 
5437                          * The two arguments are swapped because the fbranch instructions
5438                          * depend on this for the non-sse case to work.
5439                          */
5440                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5441                         break;
5442                 case OP_RCOMPARE:
5443                         /*
5444                          * FIXME: Get rid of this.
5445                          * The two arguments are swapped because the fbranch instructions
5446                          * depend on this for the non-sse case to work.
5447                          */
5448                         amd64_sse_comiss_reg_reg (code, ins->sreg2, ins->sreg1);
5449                         break;
5450                 case OP_FCNEQ:
5451                 case OP_FCEQ: {
5452                         /* zeroing the register at the start results in 
5453                          * shorter and faster code (we can also remove the widening op)
5454                          */
5455                         guchar *unordered_check;
5456
5457                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5458                         amd64_sse_comisd_reg_reg (code, ins->sreg1, ins->sreg2);
5459                         unordered_check = code;
5460                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5461
5462                         if (ins->opcode == OP_FCEQ) {
5463                                 amd64_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
5464                                 amd64_patch (unordered_check, code);
5465                         } else {
5466                                 guchar *jump_to_end;
5467                                 amd64_set_reg (code, X86_CC_NE, ins->dreg, FALSE);
5468                                 jump_to_end = code;
5469                                 x86_jump8 (code, 0);
5470                                 amd64_patch (unordered_check, code);
5471                                 amd64_inc_reg (code, ins->dreg);
5472                                 amd64_patch (jump_to_end, code);
5473                         }
5474                         break;
5475                 }
5476                 case OP_FCLT:
5477                 case OP_FCLT_UN: {
5478                         /* zeroing the register at the start results in 
5479                          * shorter and faster code (we can also remove the widening op)
5480                          */
5481                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5482                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5483                         if (ins->opcode == OP_FCLT_UN) {
5484                                 guchar *unordered_check = code;
5485                                 guchar *jump_to_end;
5486                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5487                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
5488                                 jump_to_end = code;
5489                                 x86_jump8 (code, 0);
5490                                 amd64_patch (unordered_check, code);
5491                                 amd64_inc_reg (code, ins->dreg);
5492                                 amd64_patch (jump_to_end, code);
5493                         } else {
5494                                 amd64_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
5495                         }
5496                         break;
5497                 }
5498                 case OP_FCLE: {
5499                         guchar *unordered_check;
5500                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5501                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5502                         unordered_check = code;
5503                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5504                         amd64_set_reg (code, X86_CC_NB, ins->dreg, FALSE);
5505                         amd64_patch (unordered_check, code);
5506                         break;
5507                 }
5508                 case OP_FCGT:
5509                 case OP_FCGT_UN: {
5510                         /* zeroing the register at the start results in 
5511                          * shorter and faster code (we can also remove the widening op)
5512                          */
5513                         guchar *unordered_check;
5514
5515                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5516                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5517                         if (ins->opcode == OP_FCGT) {
5518                                 unordered_check = code;
5519                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5520                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
5521                                 amd64_patch (unordered_check, code);
5522                         } else {
5523                                 amd64_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
5524                         }
5525                         break;
5526                 }
5527                 case OP_FCGE: {
5528                         guchar *unordered_check;
5529                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5530                         amd64_sse_comisd_reg_reg (code, ins->sreg2, ins->sreg1);
5531                         unordered_check = code;
5532                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5533                         amd64_set_reg (code, X86_CC_NA, ins->dreg, FALSE);
5534                         amd64_patch (unordered_check, code);
5535                         break;
5536                 }
5537
5538                 case OP_RCEQ:
5539                 case OP_RCGT:
5540                 case OP_RCLT:
5541                 case OP_RCLT_UN:
5542                 case OP_RCGT_UN: {
5543                         int x86_cond;
5544                         gboolean unordered = FALSE;
5545
5546                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5547                         amd64_sse_comiss_reg_reg (code, ins->sreg2, ins->sreg1);
5548
5549                         switch (ins->opcode) {
5550                         case OP_RCEQ:
5551                                 x86_cond = X86_CC_EQ;
5552                                 break;
5553                         case OP_RCGT:
5554                                 x86_cond = X86_CC_LT;
5555                                 break;
5556                         case OP_RCLT:
5557                                 x86_cond = X86_CC_GT;
5558                                 break;
5559                         case OP_RCLT_UN:
5560                                 x86_cond = X86_CC_GT;
5561                                 unordered = TRUE;
5562                                 break;
5563                         case OP_RCGT_UN:
5564                                 x86_cond = X86_CC_LT;
5565                                 unordered = TRUE;
5566                                 break;
5567                         default:
5568                                 g_assert_not_reached ();
5569                                 break;
5570                         }
5571
5572                         if (unordered) {
5573                                 guchar *unordered_check;
5574                                 guchar *jump_to_end;
5575
5576                                 unordered_check = code;
5577                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5578                                 amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5579                                 jump_to_end = code;
5580                                 x86_jump8 (code, 0);
5581                                 amd64_patch (unordered_check, code);
5582                                 amd64_inc_reg (code, ins->dreg);
5583                                 amd64_patch (jump_to_end, code);
5584                         } else {
5585                                 amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5586                         }
5587                         break;
5588                 }
5589                 case OP_FCLT_MEMBASE:
5590                 case OP_FCGT_MEMBASE:
5591                 case OP_FCLT_UN_MEMBASE:
5592                 case OP_FCGT_UN_MEMBASE:
5593                 case OP_FCEQ_MEMBASE: {
5594                         guchar *unordered_check, *jump_to_end;
5595                         int x86_cond;
5596
5597                         amd64_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
5598                         amd64_sse_comisd_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
5599
5600                         switch (ins->opcode) {
5601                         case OP_FCEQ_MEMBASE:
5602                                 x86_cond = X86_CC_EQ;
5603                                 break;
5604                         case OP_FCLT_MEMBASE:
5605                         case OP_FCLT_UN_MEMBASE:
5606                                 x86_cond = X86_CC_LT;
5607                                 break;
5608                         case OP_FCGT_MEMBASE:
5609                         case OP_FCGT_UN_MEMBASE:
5610                                 x86_cond = X86_CC_GT;
5611                                 break;
5612                         default:
5613                                 g_assert_not_reached ();
5614                         }
5615
5616                         unordered_check = code;
5617                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5618                         amd64_set_reg (code, x86_cond, ins->dreg, FALSE);
5619
5620                         switch (ins->opcode) {
5621                         case OP_FCEQ_MEMBASE:
5622                         case OP_FCLT_MEMBASE:
5623                         case OP_FCGT_MEMBASE:
5624                                 amd64_patch (unordered_check, code);
5625                                 break;
5626                         case OP_FCLT_UN_MEMBASE:
5627                         case OP_FCGT_UN_MEMBASE:
5628                                 jump_to_end = code;
5629                                 x86_jump8 (code, 0);
5630                                 amd64_patch (unordered_check, code);
5631                                 amd64_inc_reg (code, ins->dreg);
5632                                 amd64_patch (jump_to_end, code);
5633                                 break;
5634                         default:
5635                                 break;
5636                         }
5637                         break;
5638                 }
5639                 case OP_FBEQ: {
5640                         guchar *jump = code;
5641                         x86_branch8 (code, X86_CC_P, 0, TRUE);
5642                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
5643                         amd64_patch (jump, code);
5644                         break;
5645                 }
5646                 case OP_FBNE_UN:
5647                         /* Branch if C013 != 100 */
5648                         /* branch if !ZF or (PF|CF) */
5649                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
5650                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5651                         EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
5652                         break;
5653                 case OP_FBLT:
5654                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
5655                         break;
5656                 case OP_FBLT_UN:
5657                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5658                         EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
5659                         break;
5660                 case OP_FBGT:
5661                 case OP_FBGT_UN:
5662                         if (ins->opcode == OP_FBGT) {
5663                                 guchar *br1;
5664
5665                                 /* skip branch if C1=1 */
5666                                 br1 = code;
5667                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
5668                                 /* branch if (C0 | C3) = 1 */
5669                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
5670                                 amd64_patch (br1, code);
5671                                 break;
5672                         } else {
5673                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
5674                         }
5675                         break;
5676                 case OP_FBGE: {
5677                         /* Branch if C013 == 100 or 001 */
5678                         guchar *br1;
5679
5680                         /* skip branch if C1=1 */
5681                         br1 = code;
5682                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5683                         /* branch if (C0 | C3) = 1 */
5684                         EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
5685                         amd64_patch (br1, code);
5686                         break;
5687                 }
5688                 case OP_FBGE_UN:
5689                         /* Branch if C013 == 000 */
5690                         EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
5691                         break;
5692                 case OP_FBLE: {
5693                         /* Branch if C013=000 or 100 */
5694                         guchar *br1;
5695
5696                         /* skip branch if C1=1 */
5697                         br1 = code;
5698                         x86_branch8 (code, X86_CC_P, 0, FALSE);
5699                         /* branch if C0=0 */
5700                         EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
5701                         amd64_patch (br1, code);
5702                         break;
5703                 }
5704                 case OP_FBLE_UN:
5705                         /* Branch if C013 != 001 */
5706                         EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
5707                         EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
5708                         break;
5709                 case OP_CKFINITE:
5710                         /* Transfer value to the fp stack */
5711                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 16);
5712                         amd64_movsd_membase_reg (code, AMD64_RSP, 0, ins->sreg1);
5713                         amd64_fld_membase (code, AMD64_RSP, 0, TRUE);
5714
5715                         amd64_push_reg (code, AMD64_RAX);
5716                         amd64_fxam (code);
5717                         amd64_fnstsw (code);
5718                         amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0x4100);
5719                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, X86_FP_C0);
5720                         amd64_pop_reg (code, AMD64_RAX);
5721                         amd64_fstp (code, 0);
5722                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
5723                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 16);
5724                         break;
5725                 case OP_TLS_GET: {
5726                         code = mono_amd64_emit_tls_get (code, ins->dreg, ins->inst_offset);
5727                         break;
5728                 }
5729                 case OP_TLS_GET_REG:
5730                         code = emit_tls_get_reg (code, ins->dreg, ins->sreg1);
5731                         break;
5732                 case OP_TLS_SET: {
5733                         code = amd64_emit_tls_set (code, ins->sreg1, ins->inst_offset);
5734                         break;
5735                 }
5736                 case OP_TLS_SET_REG: {
5737                         code = amd64_emit_tls_set_reg (code, ins->sreg1, ins->sreg2);
5738                         break;
5739                 }
5740                 case OP_MEMORY_BARRIER: {
5741                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5742                                 x86_mfence (code);
5743                         break;
5744                 }
5745                 case OP_ATOMIC_ADD_I4:
5746                 case OP_ATOMIC_ADD_I8: {
5747                         int dreg = ins->dreg;
5748                         guint32 size = (ins->opcode == OP_ATOMIC_ADD_I4) ? 4 : 8;
5749
5750                         if ((dreg == ins->sreg2) || (dreg == ins->inst_basereg))
5751                                 dreg = AMD64_R11;
5752
5753                         amd64_mov_reg_reg (code, dreg, ins->sreg2, size);
5754                         amd64_prefix (code, X86_LOCK_PREFIX);
5755                         amd64_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, size);
5756                         /* dreg contains the old value, add with sreg2 value */
5757                         amd64_alu_reg_reg_size (code, X86_ADD, dreg, ins->sreg2, size);
5758                         
5759                         if (ins->dreg != dreg)
5760                                 amd64_mov_reg_reg (code, ins->dreg, dreg, size);
5761
5762                         break;
5763                 }
5764                 case OP_ATOMIC_EXCHANGE_I4:
5765                 case OP_ATOMIC_EXCHANGE_I8: {
5766                         guint32 size = ins->opcode == OP_ATOMIC_EXCHANGE_I4 ? 4 : 8;
5767
5768                         /* LOCK prefix is implied. */
5769                         amd64_mov_reg_reg (code, GP_SCRATCH_REG, ins->sreg2, size);
5770                         amd64_xchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, GP_SCRATCH_REG, size);
5771                         amd64_mov_reg_reg (code, ins->dreg, GP_SCRATCH_REG, size);
5772                         break;
5773                 }
5774                 case OP_ATOMIC_CAS_I4:
5775                 case OP_ATOMIC_CAS_I8: {
5776                         guint32 size;
5777
5778                         if (ins->opcode == OP_ATOMIC_CAS_I8)
5779                                 size = 8;
5780                         else
5781                                 size = 4;
5782
5783                         /* 
5784                          * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
5785                          * an explanation of how this works.
5786                          */
5787                         g_assert (ins->sreg3 == AMD64_RAX);
5788                         g_assert (ins->sreg1 != AMD64_RAX);
5789                         g_assert (ins->sreg1 != ins->sreg2);
5790
5791                         amd64_prefix (code, X86_LOCK_PREFIX);
5792                         amd64_cmpxchg_membase_reg_size (code, ins->sreg1, ins->inst_offset, ins->sreg2, size);
5793
5794                         if (ins->dreg != AMD64_RAX)
5795                                 amd64_mov_reg_reg (code, ins->dreg, AMD64_RAX, size);
5796                         break;
5797                 }
5798                 case OP_ATOMIC_LOAD_I1: {
5799                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
5800                         break;
5801                 }
5802                 case OP_ATOMIC_LOAD_U1: {
5803                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
5804                         break;
5805                 }
5806                 case OP_ATOMIC_LOAD_I2: {
5807                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
5808                         break;
5809                 }
5810                 case OP_ATOMIC_LOAD_U2: {
5811                         amd64_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
5812                         break;
5813                 }
5814                 case OP_ATOMIC_LOAD_I4: {
5815                         amd64_movsxd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5816                         break;
5817                 }
5818                 case OP_ATOMIC_LOAD_U4:
5819                 case OP_ATOMIC_LOAD_I8:
5820                 case OP_ATOMIC_LOAD_U8: {
5821                         amd64_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, ins->opcode == OP_ATOMIC_LOAD_U4 ? 4 : 8);
5822                         break;
5823                 }
5824                 case OP_ATOMIC_LOAD_R4: {
5825                         amd64_sse_movss_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5826                         amd64_sse_cvtss2sd_reg_reg (code, ins->dreg, ins->dreg);
5827                         break;
5828                 }
5829                 case OP_ATOMIC_LOAD_R8: {
5830                         amd64_sse_movsd_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset);
5831                         break;
5832                 }
5833                 case OP_ATOMIC_STORE_I1:
5834                 case OP_ATOMIC_STORE_U1:
5835                 case OP_ATOMIC_STORE_I2:
5836                 case OP_ATOMIC_STORE_U2:
5837                 case OP_ATOMIC_STORE_I4:
5838                 case OP_ATOMIC_STORE_U4:
5839                 case OP_ATOMIC_STORE_I8:
5840                 case OP_ATOMIC_STORE_U8: {
5841                         int size;
5842
5843                         switch (ins->opcode) {
5844                         case OP_ATOMIC_STORE_I1:
5845                         case OP_ATOMIC_STORE_U1:
5846                                 size = 1;
5847                                 break;
5848                         case OP_ATOMIC_STORE_I2:
5849                         case OP_ATOMIC_STORE_U2:
5850                                 size = 2;
5851                                 break;
5852                         case OP_ATOMIC_STORE_I4:
5853                         case OP_ATOMIC_STORE_U4:
5854                                 size = 4;
5855                                 break;
5856                         case OP_ATOMIC_STORE_I8:
5857                         case OP_ATOMIC_STORE_U8:
5858                                 size = 8;
5859                                 break;
5860                         }
5861
5862                         amd64_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, size);
5863
5864                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5865                                 x86_mfence (code);
5866                         break;
5867                 }
5868                 case OP_ATOMIC_STORE_R4: {
5869                         amd64_sse_cvtsd2ss_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
5870                         amd64_sse_movss_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, MONO_ARCH_FP_SCRATCH_REG);
5871
5872                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5873                                 x86_mfence (code);
5874                         break;
5875                 }
5876                 case OP_ATOMIC_STORE_R8: {
5877                         x86_nop (code);
5878                         x86_nop (code);
5879                         amd64_sse_movsd_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1);
5880                         x86_nop (code);
5881                         x86_nop (code);
5882
5883                         if (ins->backend.memory_barrier_kind == MONO_MEMORY_BARRIER_SEQ)
5884                                 x86_mfence (code);
5885                         break;
5886                 }
5887                 case OP_CARD_TABLE_WBARRIER: {
5888                         int ptr = ins->sreg1;
5889                         int value = ins->sreg2;
5890                         guchar *br = 0;
5891                         int nursery_shift, card_table_shift;
5892                         gpointer card_table_mask;
5893                         size_t nursery_size;
5894
5895                         gpointer card_table = mono_gc_get_card_table (&card_table_shift, &card_table_mask);
5896                         guint64 nursery_start = (guint64)mono_gc_get_nursery (&nursery_shift, &nursery_size);
5897                         guint64 shifted_nursery_start = nursery_start >> nursery_shift;
5898
5899                         /*If either point to the stack we can simply avoid the WB. This happens due to
5900                          * optimizations revealing a stack store that was not visible when op_cardtable was emited.
5901                          */
5902                         if (ins->sreg1 == AMD64_RSP || ins->sreg2 == AMD64_RSP)
5903                                 continue;
5904
5905                         /*
5906                          * We need one register we can clobber, we choose EDX and make sreg1
5907                          * fixed EAX to work around limitations in the local register allocator.
5908                          * sreg2 might get allocated to EDX, but that is not a problem since
5909                          * we use it before clobbering EDX.
5910                          */
5911                         g_assert (ins->sreg1 == AMD64_RAX);
5912
5913                         /*
5914                          * This is the code we produce:
5915                          *
5916                          *   edx = value
5917                          *   edx >>= nursery_shift
5918                          *   cmp edx, (nursery_start >> nursery_shift)
5919                          *   jne done
5920                          *   edx = ptr
5921                          *   edx >>= card_table_shift
5922                          *   edx += cardtable
5923                          *   [edx] = 1
5924                          * done:
5925                          */
5926
5927                         if (mono_gc_card_table_nursery_check ()) {
5928                                 if (value != AMD64_RDX)
5929                                         amd64_mov_reg_reg (code, AMD64_RDX, value, 8);
5930                                 amd64_shift_reg_imm (code, X86_SHR, AMD64_RDX, nursery_shift);
5931                                 if (shifted_nursery_start >> 31) {
5932                                         /*
5933                                          * The value we need to compare against is 64 bits, so we need
5934                                          * another spare register.  We use RBX, which we save and
5935                                          * restore.
5936                                          */
5937                                         amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RBX, 8);
5938                                         amd64_mov_reg_imm (code, AMD64_RBX, shifted_nursery_start);
5939                                         amd64_alu_reg_reg (code, X86_CMP, AMD64_RDX, AMD64_RBX);
5940                                         amd64_mov_reg_membase (code, AMD64_RBX, AMD64_RSP, -8, 8);
5941                                 } else {
5942                                         amd64_alu_reg_imm (code, X86_CMP, AMD64_RDX, shifted_nursery_start);
5943                                 }
5944                                 br = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
5945                         }
5946                         amd64_mov_reg_reg (code, AMD64_RDX, ptr, 8);
5947                         amd64_shift_reg_imm (code, X86_SHR, AMD64_RDX, card_table_shift);
5948                         if (card_table_mask)
5949                                 amd64_alu_reg_imm (code, X86_AND, AMD64_RDX, (guint32)(guint64)card_table_mask);
5950
5951                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GC_CARD_TABLE_ADDR, card_table);
5952                         amd64_alu_reg_membase (code, X86_ADD, AMD64_RDX, AMD64_RIP, 0);
5953
5954                         amd64_mov_membase_imm (code, AMD64_RDX, 0, 1, 1);
5955
5956                         if (mono_gc_card_table_nursery_check ())
5957                                 x86_patch (br, code);
5958                         break;
5959                 }
5960 #ifdef MONO_ARCH_SIMD_INTRINSICS
5961                 /* TODO: Some of these IR opcodes are marked as no clobber when they indeed do. */
5962                 case OP_ADDPS:
5963                         amd64_sse_addps_reg_reg (code, ins->sreg1, ins->sreg2);
5964                         break;
5965                 case OP_DIVPS:
5966                         amd64_sse_divps_reg_reg (code, ins->sreg1, ins->sreg2);
5967                         break;
5968                 case OP_MULPS:
5969                         amd64_sse_mulps_reg_reg (code, ins->sreg1, ins->sreg2);
5970                         break;
5971                 case OP_SUBPS:
5972                         amd64_sse_subps_reg_reg (code, ins->sreg1, ins->sreg2);
5973                         break;
5974                 case OP_MAXPS:
5975                         amd64_sse_maxps_reg_reg (code, ins->sreg1, ins->sreg2);
5976                         break;
5977                 case OP_MINPS:
5978                         amd64_sse_minps_reg_reg (code, ins->sreg1, ins->sreg2);
5979                         break;
5980                 case OP_COMPPS:
5981                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
5982                         amd64_sse_cmpps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
5983                         break;
5984                 case OP_ANDPS:
5985                         amd64_sse_andps_reg_reg (code, ins->sreg1, ins->sreg2);
5986                         break;
5987                 case OP_ANDNPS:
5988                         amd64_sse_andnps_reg_reg (code, ins->sreg1, ins->sreg2);
5989                         break;
5990                 case OP_ORPS:
5991                         amd64_sse_orps_reg_reg (code, ins->sreg1, ins->sreg2);
5992                         break;
5993                 case OP_XORPS:
5994                         amd64_sse_xorps_reg_reg (code, ins->sreg1, ins->sreg2);
5995                         break;
5996                 case OP_SQRTPS:
5997                         amd64_sse_sqrtps_reg_reg (code, ins->dreg, ins->sreg1);
5998                         break;
5999                 case OP_RSQRTPS:
6000                         amd64_sse_rsqrtps_reg_reg (code, ins->dreg, ins->sreg1);
6001                         break;
6002                 case OP_RCPPS:
6003                         amd64_sse_rcpps_reg_reg (code, ins->dreg, ins->sreg1);
6004                         break;
6005                 case OP_ADDSUBPS:
6006                         amd64_sse_addsubps_reg_reg (code, ins->sreg1, ins->sreg2);
6007                         break;
6008                 case OP_HADDPS:
6009                         amd64_sse_haddps_reg_reg (code, ins->sreg1, ins->sreg2);
6010                         break;
6011                 case OP_HSUBPS:
6012                         amd64_sse_hsubps_reg_reg (code, ins->sreg1, ins->sreg2);
6013                         break;
6014                 case OP_DUPPS_HIGH:
6015                         amd64_sse_movshdup_reg_reg (code, ins->dreg, ins->sreg1);
6016                         break;
6017                 case OP_DUPPS_LOW:
6018                         amd64_sse_movsldup_reg_reg (code, ins->dreg, ins->sreg1);
6019                         break;
6020
6021                 case OP_PSHUFLEW_HIGH:
6022                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
6023                         amd64_sse_pshufhw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6024                         break;
6025                 case OP_PSHUFLEW_LOW:
6026                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
6027                         amd64_sse_pshuflw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6028                         break;
6029                 case OP_PSHUFLED:
6030                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
6031                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6032                         break;
6033                 case OP_SHUFPS:
6034                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
6035                         amd64_sse_shufps_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
6036                         break;
6037                 case OP_SHUFPD:
6038                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3);
6039                         amd64_sse_shufpd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
6040                         break;
6041
6042                 case OP_ADDPD:
6043                         amd64_sse_addpd_reg_reg (code, ins->sreg1, ins->sreg2);
6044                         break;
6045                 case OP_DIVPD:
6046                         amd64_sse_divpd_reg_reg (code, ins->sreg1, ins->sreg2);
6047                         break;
6048                 case OP_MULPD:
6049                         amd64_sse_mulpd_reg_reg (code, ins->sreg1, ins->sreg2);
6050                         break;
6051                 case OP_SUBPD:
6052                         amd64_sse_subpd_reg_reg (code, ins->sreg1, ins->sreg2);
6053                         break;
6054                 case OP_MAXPD:
6055                         amd64_sse_maxpd_reg_reg (code, ins->sreg1, ins->sreg2);
6056                         break;
6057                 case OP_MINPD:
6058                         amd64_sse_minpd_reg_reg (code, ins->sreg1, ins->sreg2);
6059                         break;
6060                 case OP_COMPPD:
6061                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
6062                         amd64_sse_cmppd_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
6063                         break;
6064                 case OP_ANDPD:
6065                         amd64_sse_andpd_reg_reg (code, ins->sreg1, ins->sreg2);
6066                         break;
6067                 case OP_ANDNPD:
6068                         amd64_sse_andnpd_reg_reg (code, ins->sreg1, ins->sreg2);
6069                         break;
6070                 case OP_ORPD:
6071                         amd64_sse_orpd_reg_reg (code, ins->sreg1, ins->sreg2);
6072                         break;
6073                 case OP_XORPD:
6074                         amd64_sse_xorpd_reg_reg (code, ins->sreg1, ins->sreg2);
6075                         break;
6076                 case OP_SQRTPD:
6077                         amd64_sse_sqrtpd_reg_reg (code, ins->dreg, ins->sreg1);
6078                         break;
6079                 case OP_ADDSUBPD:
6080                         amd64_sse_addsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
6081                         break;
6082                 case OP_HADDPD:
6083                         amd64_sse_haddpd_reg_reg (code, ins->sreg1, ins->sreg2);
6084                         break;
6085                 case OP_HSUBPD:
6086                         amd64_sse_hsubpd_reg_reg (code, ins->sreg1, ins->sreg2);
6087                         break;
6088                 case OP_DUPPD:
6089                         amd64_sse_movddup_reg_reg (code, ins->dreg, ins->sreg1);
6090                         break;
6091
6092                 case OP_EXTRACT_MASK:
6093                         amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
6094                         break;
6095
6096                 case OP_PAND:
6097                         amd64_sse_pand_reg_reg (code, ins->sreg1, ins->sreg2);
6098                         break;
6099                 case OP_POR:
6100                         amd64_sse_por_reg_reg (code, ins->sreg1, ins->sreg2);
6101                         break;
6102                 case OP_PXOR:
6103                         amd64_sse_pxor_reg_reg (code, ins->sreg1, ins->sreg2);
6104                         break;
6105
6106                 case OP_PADDB:
6107                         amd64_sse_paddb_reg_reg (code, ins->sreg1, ins->sreg2);
6108                         break;
6109                 case OP_PADDW:
6110                         amd64_sse_paddw_reg_reg (code, ins->sreg1, ins->sreg2);
6111                         break;
6112                 case OP_PADDD:
6113                         amd64_sse_paddd_reg_reg (code, ins->sreg1, ins->sreg2);
6114                         break;
6115                 case OP_PADDQ:
6116                         amd64_sse_paddq_reg_reg (code, ins->sreg1, ins->sreg2);
6117                         break;
6118
6119                 case OP_PSUBB:
6120                         amd64_sse_psubb_reg_reg (code, ins->sreg1, ins->sreg2);
6121                         break;
6122                 case OP_PSUBW:
6123                         amd64_sse_psubw_reg_reg (code, ins->sreg1, ins->sreg2);
6124                         break;
6125                 case OP_PSUBD:
6126                         amd64_sse_psubd_reg_reg (code, ins->sreg1, ins->sreg2);
6127                         break;
6128                 case OP_PSUBQ:
6129                         amd64_sse_psubq_reg_reg (code, ins->sreg1, ins->sreg2);
6130                         break;
6131
6132                 case OP_PMAXB_UN:
6133                         amd64_sse_pmaxub_reg_reg (code, ins->sreg1, ins->sreg2);
6134                         break;
6135                 case OP_PMAXW_UN:
6136                         amd64_sse_pmaxuw_reg_reg (code, ins->sreg1, ins->sreg2);
6137                         break;
6138                 case OP_PMAXD_UN:
6139                         amd64_sse_pmaxud_reg_reg (code, ins->sreg1, ins->sreg2);
6140                         break;
6141                 
6142                 case OP_PMAXB:
6143                         amd64_sse_pmaxsb_reg_reg (code, ins->sreg1, ins->sreg2);
6144                         break;
6145                 case OP_PMAXW:
6146                         amd64_sse_pmaxsw_reg_reg (code, ins->sreg1, ins->sreg2);
6147                         break;
6148                 case OP_PMAXD:
6149                         amd64_sse_pmaxsd_reg_reg (code, ins->sreg1, ins->sreg2);
6150                         break;
6151
6152                 case OP_PAVGB_UN:
6153                         amd64_sse_pavgb_reg_reg (code, ins->sreg1, ins->sreg2);
6154                         break;
6155                 case OP_PAVGW_UN:
6156                         amd64_sse_pavgw_reg_reg (code, ins->sreg1, ins->sreg2);
6157                         break;
6158
6159                 case OP_PMINB_UN:
6160                         amd64_sse_pminub_reg_reg (code, ins->sreg1, ins->sreg2);
6161                         break;
6162                 case OP_PMINW_UN:
6163                         amd64_sse_pminuw_reg_reg (code, ins->sreg1, ins->sreg2);
6164                         break;
6165                 case OP_PMIND_UN:
6166                         amd64_sse_pminud_reg_reg (code, ins->sreg1, ins->sreg2);
6167                         break;
6168
6169                 case OP_PMINB:
6170                         amd64_sse_pminsb_reg_reg (code, ins->sreg1, ins->sreg2);
6171                         break;
6172                 case OP_PMINW:
6173                         amd64_sse_pminsw_reg_reg (code, ins->sreg1, ins->sreg2);
6174                         break;
6175                 case OP_PMIND:
6176                         amd64_sse_pminsd_reg_reg (code, ins->sreg1, ins->sreg2);
6177                         break;
6178
6179                 case OP_PCMPEQB:
6180                         amd64_sse_pcmpeqb_reg_reg (code, ins->sreg1, ins->sreg2);
6181                         break;
6182                 case OP_PCMPEQW:
6183                         amd64_sse_pcmpeqw_reg_reg (code, ins->sreg1, ins->sreg2);
6184                         break;
6185                 case OP_PCMPEQD:
6186                         amd64_sse_pcmpeqd_reg_reg (code, ins->sreg1, ins->sreg2);
6187                         break;
6188                 case OP_PCMPEQQ:
6189                         amd64_sse_pcmpeqq_reg_reg (code, ins->sreg1, ins->sreg2);
6190                         break;
6191
6192                 case OP_PCMPGTB:
6193                         amd64_sse_pcmpgtb_reg_reg (code, ins->sreg1, ins->sreg2);
6194                         break;
6195                 case OP_PCMPGTW:
6196                         amd64_sse_pcmpgtw_reg_reg (code, ins->sreg1, ins->sreg2);
6197                         break;
6198                 case OP_PCMPGTD:
6199                         amd64_sse_pcmpgtd_reg_reg (code, ins->sreg1, ins->sreg2);
6200                         break;
6201                 case OP_PCMPGTQ:
6202                         amd64_sse_pcmpgtq_reg_reg (code, ins->sreg1, ins->sreg2);
6203                         break;
6204
6205                 case OP_PSUM_ABS_DIFF:
6206                         amd64_sse_psadbw_reg_reg (code, ins->sreg1, ins->sreg2);
6207                         break;
6208
6209                 case OP_UNPACK_LOWB:
6210                         amd64_sse_punpcklbw_reg_reg (code, ins->sreg1, ins->sreg2);
6211                         break;
6212                 case OP_UNPACK_LOWW:
6213                         amd64_sse_punpcklwd_reg_reg (code, ins->sreg1, ins->sreg2);
6214                         break;
6215                 case OP_UNPACK_LOWD:
6216                         amd64_sse_punpckldq_reg_reg (code, ins->sreg1, ins->sreg2);
6217                         break;
6218                 case OP_UNPACK_LOWQ:
6219                         amd64_sse_punpcklqdq_reg_reg (code, ins->sreg1, ins->sreg2);
6220                         break;
6221                 case OP_UNPACK_LOWPS:
6222                         amd64_sse_unpcklps_reg_reg (code, ins->sreg1, ins->sreg2);
6223                         break;
6224                 case OP_UNPACK_LOWPD:
6225                         amd64_sse_unpcklpd_reg_reg (code, ins->sreg1, ins->sreg2);
6226                         break;
6227
6228                 case OP_UNPACK_HIGHB:
6229                         amd64_sse_punpckhbw_reg_reg (code, ins->sreg1, ins->sreg2);
6230                         break;
6231                 case OP_UNPACK_HIGHW:
6232                         amd64_sse_punpckhwd_reg_reg (code, ins->sreg1, ins->sreg2);
6233                         break;
6234                 case OP_UNPACK_HIGHD:
6235                         amd64_sse_punpckhdq_reg_reg (code, ins->sreg1, ins->sreg2);
6236                         break;
6237                 case OP_UNPACK_HIGHQ:
6238                         amd64_sse_punpckhqdq_reg_reg (code, ins->sreg1, ins->sreg2);
6239                         break;
6240                 case OP_UNPACK_HIGHPS:
6241                         amd64_sse_unpckhps_reg_reg (code, ins->sreg1, ins->sreg2);
6242                         break;
6243                 case OP_UNPACK_HIGHPD:
6244                         amd64_sse_unpckhpd_reg_reg (code, ins->sreg1, ins->sreg2);
6245                         break;
6246
6247                 case OP_PACKW:
6248                         amd64_sse_packsswb_reg_reg (code, ins->sreg1, ins->sreg2);
6249                         break;
6250                 case OP_PACKD:
6251                         amd64_sse_packssdw_reg_reg (code, ins->sreg1, ins->sreg2);
6252                         break;
6253                 case OP_PACKW_UN:
6254                         amd64_sse_packuswb_reg_reg (code, ins->sreg1, ins->sreg2);
6255                         break;
6256                 case OP_PACKD_UN:
6257                         amd64_sse_packusdw_reg_reg (code, ins->sreg1, ins->sreg2);
6258                         break;
6259
6260                 case OP_PADDB_SAT_UN:
6261                         amd64_sse_paddusb_reg_reg (code, ins->sreg1, ins->sreg2);
6262                         break;
6263                 case OP_PSUBB_SAT_UN:
6264                         amd64_sse_psubusb_reg_reg (code, ins->sreg1, ins->sreg2);
6265                         break;
6266                 case OP_PADDW_SAT_UN:
6267                         amd64_sse_paddusw_reg_reg (code, ins->sreg1, ins->sreg2);
6268                         break;
6269                 case OP_PSUBW_SAT_UN:
6270                         amd64_sse_psubusw_reg_reg (code, ins->sreg1, ins->sreg2);
6271                         break;
6272
6273                 case OP_PADDB_SAT:
6274                         amd64_sse_paddsb_reg_reg (code, ins->sreg1, ins->sreg2);
6275                         break;
6276                 case OP_PSUBB_SAT:
6277                         amd64_sse_psubsb_reg_reg (code, ins->sreg1, ins->sreg2);
6278                         break;
6279                 case OP_PADDW_SAT:
6280                         amd64_sse_paddsw_reg_reg (code, ins->sreg1, ins->sreg2);
6281                         break;
6282                 case OP_PSUBW_SAT:
6283                         amd64_sse_psubsw_reg_reg (code, ins->sreg1, ins->sreg2);
6284                         break;
6285                         
6286                 case OP_PMULW:
6287                         amd64_sse_pmullw_reg_reg (code, ins->sreg1, ins->sreg2);
6288                         break;
6289                 case OP_PMULD:
6290                         amd64_sse_pmulld_reg_reg (code, ins->sreg1, ins->sreg2);
6291                         break;
6292                 case OP_PMULQ:
6293                         amd64_sse_pmuludq_reg_reg (code, ins->sreg1, ins->sreg2);
6294                         break;
6295                 case OP_PMULW_HIGH_UN:
6296                         amd64_sse_pmulhuw_reg_reg (code, ins->sreg1, ins->sreg2);
6297                         break;
6298                 case OP_PMULW_HIGH:
6299                         amd64_sse_pmulhw_reg_reg (code, ins->sreg1, ins->sreg2);
6300                         break;
6301
6302                 case OP_PSHRW:
6303                         amd64_sse_psrlw_reg_imm (code, ins->dreg, ins->inst_imm);
6304                         break;
6305                 case OP_PSHRW_REG:
6306                         amd64_sse_psrlw_reg_reg (code, ins->dreg, ins->sreg2);
6307                         break;
6308
6309                 case OP_PSARW:
6310                         amd64_sse_psraw_reg_imm (code, ins->dreg, ins->inst_imm);
6311                         break;
6312                 case OP_PSARW_REG:
6313                         amd64_sse_psraw_reg_reg (code, ins->dreg, ins->sreg2);
6314                         break;
6315
6316                 case OP_PSHLW:
6317                         amd64_sse_psllw_reg_imm (code, ins->dreg, ins->inst_imm);
6318                         break;
6319                 case OP_PSHLW_REG:
6320                         amd64_sse_psllw_reg_reg (code, ins->dreg, ins->sreg2);
6321                         break;
6322
6323                 case OP_PSHRD:
6324                         amd64_sse_psrld_reg_imm (code, ins->dreg, ins->inst_imm);
6325                         break;
6326                 case OP_PSHRD_REG:
6327                         amd64_sse_psrld_reg_reg (code, ins->dreg, ins->sreg2);
6328                         break;
6329
6330                 case OP_PSARD:
6331                         amd64_sse_psrad_reg_imm (code, ins->dreg, ins->inst_imm);
6332                         break;
6333                 case OP_PSARD_REG:
6334                         amd64_sse_psrad_reg_reg (code, ins->dreg, ins->sreg2);
6335                         break;
6336
6337                 case OP_PSHLD:
6338                         amd64_sse_pslld_reg_imm (code, ins->dreg, ins->inst_imm);
6339                         break;
6340                 case OP_PSHLD_REG:
6341                         amd64_sse_pslld_reg_reg (code, ins->dreg, ins->sreg2);
6342                         break;
6343
6344                 case OP_PSHRQ:
6345                         amd64_sse_psrlq_reg_imm (code, ins->dreg, ins->inst_imm);
6346                         break;
6347                 case OP_PSHRQ_REG:
6348                         amd64_sse_psrlq_reg_reg (code, ins->dreg, ins->sreg2);
6349                         break;
6350                 
6351                 /*TODO: This is appart of the sse spec but not added
6352                 case OP_PSARQ:
6353                         amd64_sse_psraq_reg_imm (code, ins->dreg, ins->inst_imm);
6354                         break;
6355                 case OP_PSARQ_REG:
6356                         amd64_sse_psraq_reg_reg (code, ins->dreg, ins->sreg2);
6357                         break;  
6358                 */
6359         
6360                 case OP_PSHLQ:
6361                         amd64_sse_psllq_reg_imm (code, ins->dreg, ins->inst_imm);
6362                         break;
6363                 case OP_PSHLQ_REG:
6364                         amd64_sse_psllq_reg_reg (code, ins->dreg, ins->sreg2);
6365                         break;  
6366                 case OP_CVTDQ2PD:
6367                         amd64_sse_cvtdq2pd_reg_reg (code, ins->dreg, ins->sreg1);
6368                         break;
6369                 case OP_CVTDQ2PS:
6370                         amd64_sse_cvtdq2ps_reg_reg (code, ins->dreg, ins->sreg1);
6371                         break;
6372                 case OP_CVTPD2DQ:
6373                         amd64_sse_cvtpd2dq_reg_reg (code, ins->dreg, ins->sreg1);
6374                         break;
6375                 case OP_CVTPD2PS:
6376                         amd64_sse_cvtpd2ps_reg_reg (code, ins->dreg, ins->sreg1);
6377                         break;
6378                 case OP_CVTPS2DQ:
6379                         amd64_sse_cvtps2dq_reg_reg (code, ins->dreg, ins->sreg1);
6380                         break;
6381                 case OP_CVTPS2PD:
6382                         amd64_sse_cvtps2pd_reg_reg (code, ins->dreg, ins->sreg1);
6383                         break;
6384                 case OP_CVTTPD2DQ:
6385                         amd64_sse_cvttpd2dq_reg_reg (code, ins->dreg, ins->sreg1);
6386                         break;
6387                 case OP_CVTTPS2DQ:
6388                         amd64_sse_cvttps2dq_reg_reg (code, ins->dreg, ins->sreg1);
6389                         break;
6390
6391                 case OP_ICONV_TO_X:
6392                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6393                         break;
6394                 case OP_EXTRACT_I4:
6395                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6396                         break;
6397                 case OP_EXTRACT_I8:
6398                         if (ins->inst_c0) {
6399                                 amd64_movhlps_reg_reg (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1);
6400                                 amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8);
6401                         } else {
6402                                 amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8);
6403                         }
6404                         break;
6405                 case OP_EXTRACT_I1:
6406                 case OP_EXTRACT_U1:
6407                         amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6408                         if (ins->inst_c0)
6409                                 amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
6410                         amd64_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
6411                         break;
6412                 case OP_EXTRACT_I2:
6413                 case OP_EXTRACT_U2:
6414                         /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6415                         if (ins->inst_c0)
6416                                 amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
6417                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6418                         amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE, 4);
6419                         break;
6420                 case OP_EXTRACT_R8:
6421                         if (ins->inst_c0)
6422                                 amd64_movhlps_reg_reg (code, ins->dreg, ins->sreg1);
6423                         else
6424                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6425                         break;
6426                 case OP_INSERT_I2:
6427                         amd64_sse_pinsrw_reg_reg_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0);
6428                         break;
6429                 case OP_EXTRACTX_U2:
6430                         amd64_sse_pextrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
6431                         break;
6432                 case OP_INSERTX_U1_SLOW:
6433                         /*sreg1 is the extracted ireg (scratch)
6434                         /sreg2 is the to be inserted ireg (scratch)
6435                         /dreg is the xreg to receive the value*/
6436
6437                         /*clear the bits from the extracted word*/
6438                         amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
6439                         /*shift the value to insert if needed*/
6440                         if (ins->inst_c0 & 1)
6441                                 amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4);
6442                         /*join them together*/
6443                         amd64_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
6444                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
6445                         break;
6446                 case OP_INSERTX_I4_SLOW:
6447                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
6448                         amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
6449                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
6450                         break;
6451                 case OP_INSERTX_I8_SLOW:
6452                         amd64_movd_xreg_reg_size(code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg2, 8);
6453                         if (ins->inst_c0)
6454                                 amd64_movlhps_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
6455                         else
6456                                 amd64_sse_movsd_reg_reg (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG);
6457                         break;
6458
6459                 case OP_INSERTX_R4_SLOW:
6460                         switch (ins->inst_c0) {
6461                         case 0:
6462                                 if (cfg->r4fp)
6463                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6464                                 else
6465                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6466                                 break;
6467                         case 1:
6468                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
6469                                 if (cfg->r4fp)
6470                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6471                                 else
6472                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6473                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3));
6474                                 break;
6475                         case 2:
6476                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
6477                                 if (cfg->r4fp)
6478                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6479                                 else
6480                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6481                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3));
6482                                 break;
6483                         case 3:
6484                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
6485                                 if (cfg->r4fp)
6486                                         amd64_sse_movss_reg_reg (code, ins->dreg, ins->sreg2);
6487                                 else
6488                                         amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->sreg2);
6489                                 amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0));
6490                                 break;
6491                         }
6492                         break;
6493                 case OP_INSERTX_R8_SLOW:
6494                         if (ins->inst_c0)
6495                                 amd64_movlhps_reg_reg (code, ins->dreg, ins->sreg2);
6496                         else
6497                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg2);
6498                         break;
6499                 case OP_STOREX_MEMBASE_REG:
6500                 case OP_STOREX_MEMBASE:
6501                         amd64_sse_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
6502                         break;
6503                 case OP_LOADX_MEMBASE:
6504                         amd64_sse_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6505                         break;
6506                 case OP_LOADX_ALIGNED_MEMBASE:
6507                         amd64_sse_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6508                         break;
6509                 case OP_STOREX_ALIGNED_MEMBASE_REG:
6510                         amd64_sse_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
6511                         break;
6512                 case OP_STOREX_NTA_MEMBASE_REG:
6513                         amd64_sse_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
6514                         break;
6515                 case OP_PREFETCH_MEMBASE:
6516                         amd64_sse_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
6517                         break;
6518
6519                 case OP_XMOVE:
6520                         /*FIXME the peephole pass should have killed this*/
6521                         if (ins->dreg != ins->sreg1)
6522                                 amd64_sse_movaps_reg_reg (code, ins->dreg, ins->sreg1);
6523                         break;          
6524                 case OP_XZERO:
6525                         amd64_sse_pxor_reg_reg (code, ins->dreg, ins->dreg);
6526                         break;
6527                 case OP_ICONV_TO_R4_RAW:
6528                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6529                         break;
6530
6531                 case OP_FCONV_TO_R8_X:
6532                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6533                         break;
6534
6535                 case OP_XCONV_R8_TO_I4:
6536                         amd64_sse_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
6537                         switch (ins->backend.source_opcode) {
6538                         case OP_FCONV_TO_I1:
6539                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
6540                                 break;
6541                         case OP_FCONV_TO_U1:
6542                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
6543                                 break;
6544                         case OP_FCONV_TO_I2:
6545                                 amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
6546                                 break;
6547                         case OP_FCONV_TO_U2:
6548                                 amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
6549                                 break;
6550                         }                       
6551                         break;
6552
6553                 case OP_EXPAND_I2:
6554                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 0);
6555                         amd64_sse_pinsrw_reg_reg_imm (code, ins->dreg, ins->sreg1, 1);
6556                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6557                         break;
6558                 case OP_EXPAND_I4:
6559                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
6560                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6561                         break;
6562                 case OP_EXPAND_I8:
6563                         amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8);
6564                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
6565                         break;
6566                 case OP_EXPAND_R4:
6567                         if (cfg->r4fp) {
6568                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6569                         } else {
6570                                 amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6571                                 amd64_sse_cvtsd2ss_reg_reg (code, ins->dreg, ins->dreg);
6572                         }
6573                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0);
6574                         break;
6575                 case OP_EXPAND_R8:
6576                         amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
6577                         amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
6578                         break;
6579 #endif
6580                 case OP_LIVERANGE_START: {
6581                         if (cfg->verbose_level > 1)
6582                                 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
6583                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
6584                         break;
6585                 }
6586                 case OP_LIVERANGE_END: {
6587                         if (cfg->verbose_level > 1)
6588                                 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
6589                         MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
6590                         break;
6591                 }
6592                 case OP_GC_SAFE_POINT: {
6593                         const char *polling_func = NULL;
6594                         int compare_val = 0;
6595                         guint8 *br [1];
6596
6597 #if defined (USE_COOP_GC)
6598                         polling_func = "mono_threads_state_poll";
6599                         compare_val = 1;
6600 #elif defined(__native_client_codegen__) && defined(__native_client_gc__)
6601                         polling_func = "mono_nacl_gc";
6602                         compare_val = 0xFFFFFFFF;
6603 #endif
6604                         if (!polling_func)
6605                                 break;
6606
6607                         amd64_test_membase_imm_size (code, ins->sreg1, 0, compare_val, 4);
6608                         br[0] = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
6609                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, polling_func, FALSE);
6610                         amd64_patch (br[0], code);
6611                         break;
6612                 }
6613
6614                 case OP_GC_LIVENESS_DEF:
6615                 case OP_GC_LIVENESS_USE:
6616                 case OP_GC_PARAM_SLOT_LIVENESS_DEF:
6617                         ins->backend.pc_offset = code - cfg->native_code;
6618                         break;
6619                 case OP_GC_SPILL_SLOT_LIVENESS_DEF:
6620                         ins->backend.pc_offset = code - cfg->native_code;
6621                         bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins);
6622                         break;
6623                 default:
6624                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
6625                         g_assert_not_reached ();
6626                 }
6627
6628                 if ((code - cfg->native_code - offset) > max_len) {
6629 #if !defined(__native_client_codegen__)
6630                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
6631                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
6632                         g_assert_not_reached ();
6633 #endif
6634                 }
6635         }
6636
6637         cfg->code_len = code - cfg->native_code;
6638 }
6639
6640 #endif /* DISABLE_JIT */
6641
6642 void
6643 mono_arch_register_lowlevel_calls (void)
6644 {
6645         /* The signature doesn't matter */
6646         mono_register_jit_icall (mono_amd64_throw_exception, "mono_amd64_throw_exception", mono_create_icall_signature ("void"), TRUE);
6647 }
6648
6649 void
6650 mono_arch_patch_code_new (MonoCompile *cfg, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gpointer target)
6651 {
6652         unsigned char *ip = ji->ip.i + code;
6653
6654         /*
6655          * Debug code to help track down problems where the target of a near call is
6656          * is not valid.
6657          */
6658         if (amd64_is_near_call (ip)) {
6659                 gint64 disp = (guint8*)target - (guint8*)ip;
6660
6661                 if (!amd64_is_imm32 (disp)) {
6662                         printf ("TYPE: %d\n", ji->type);
6663                         switch (ji->type) {
6664                         case MONO_PATCH_INFO_INTERNAL_METHOD:
6665                                 printf ("V: %s\n", ji->data.name);
6666                                 break;
6667                         case MONO_PATCH_INFO_METHOD_JUMP:
6668                         case MONO_PATCH_INFO_METHOD:
6669                                 printf ("V: %s\n", ji->data.method->name);
6670                                 break;
6671                         default:
6672                                 break;
6673                         }
6674                 }
6675         }
6676
6677         amd64_patch (ip, (gpointer)target);
6678 }
6679
6680 #ifndef DISABLE_JIT
6681
6682 static int
6683 get_max_epilog_size (MonoCompile *cfg)
6684 {
6685         int max_epilog_size = 16;
6686         
6687         if (cfg->method->save_lmf)
6688                 max_epilog_size += 256;
6689         
6690         if (mono_jit_trace_calls != NULL)
6691                 max_epilog_size += 50;
6692
6693         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
6694                 max_epilog_size += 50;
6695
6696         max_epilog_size += (AMD64_NREG * 2);
6697
6698         return max_epilog_size;
6699 }
6700
6701 /*
6702  * This macro is used for testing whenever the unwinder works correctly at every point
6703  * where an async exception can happen.
6704  */
6705 /* This will generate a SIGSEGV at the given point in the code */
6706 #define async_exc_point(code) do { \
6707     if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
6708          if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
6709              amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
6710          cfg->arch.async_point_count ++; \
6711     } \
6712 } while (0)
6713
6714 guint8 *
6715 mono_arch_emit_prolog (MonoCompile *cfg)
6716 {
6717         MonoMethod *method = cfg->method;
6718         MonoBasicBlock *bb;
6719         MonoMethodSignature *sig;
6720         MonoInst *ins;
6721         int alloc_size, pos, i, cfa_offset, quad, max_epilog_size, save_area_offset;
6722         guint8 *code;
6723         CallInfo *cinfo;
6724         MonoInst *lmf_var = cfg->lmf_var;
6725         gboolean args_clobbered = FALSE;
6726         gboolean trace = FALSE;
6727 #ifdef __native_client_codegen__
6728         guint alignment_check;
6729 #endif
6730
6731         cfg->code_size = MAX (cfg->header->code_size * 4, 1024);
6732
6733 #if defined(__default_codegen__)
6734         code = cfg->native_code = g_malloc (cfg->code_size);
6735 #elif defined(__native_client_codegen__)
6736         /* native_code_alloc is not 32-byte aligned, native_code is. */
6737         cfg->native_code_alloc = g_malloc (cfg->code_size + kNaClAlignment);
6738
6739         /* Align native_code to next nearest kNaclAlignment byte. */
6740         cfg->native_code = (uintptr_t)cfg->native_code_alloc + kNaClAlignment;
6741         cfg->native_code = (uintptr_t)cfg->native_code & ~kNaClAlignmentMask;
6742
6743         code = cfg->native_code;
6744
6745         alignment_check = (guint)cfg->native_code & kNaClAlignmentMask;
6746         g_assert (alignment_check == 0);
6747 #endif
6748
6749         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
6750                 trace = TRUE;
6751
6752         /* Amount of stack space allocated by register saving code */
6753         pos = 0;
6754
6755         /* Offset between RSP and the CFA */
6756         cfa_offset = 0;
6757
6758         /* 
6759          * The prolog consists of the following parts:
6760          * FP present:
6761          * - push rbp, mov rbp, rsp
6762          * - save callee saved regs using pushes
6763          * - allocate frame
6764          * - save rgctx if needed
6765          * - save lmf if needed
6766          * FP not present:
6767          * - allocate frame
6768          * - save rgctx if needed
6769          * - save lmf if needed
6770          * - save callee saved regs using moves
6771          */
6772
6773         // CFA = sp + 8
6774         cfa_offset = 8;
6775         mono_emit_unwind_op_def_cfa (cfg, code, AMD64_RSP, 8);
6776         // IP saved at CFA - 8
6777         mono_emit_unwind_op_offset (cfg, code, AMD64_RIP, -cfa_offset);
6778         async_exc_point (code);
6779         mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset, SLOT_NOREF);
6780
6781         if (!cfg->arch.omit_fp) {
6782                 amd64_push_reg (code, AMD64_RBP);
6783                 cfa_offset += 8;
6784                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6785                 mono_emit_unwind_op_offset (cfg, code, AMD64_RBP, - cfa_offset);
6786                 async_exc_point (code);
6787 #ifdef TARGET_WIN32
6788                 mono_arch_unwindinfo_add_push_nonvol (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
6789 #endif
6790                 /* These are handled automatically by the stack marking code */
6791                 mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset, SLOT_NOREF);
6792                 
6793                 amd64_mov_reg_reg (code, AMD64_RBP, AMD64_RSP, sizeof(mgreg_t));
6794                 mono_emit_unwind_op_def_cfa_reg (cfg, code, AMD64_RBP);
6795                 async_exc_point (code);
6796 #ifdef TARGET_WIN32
6797                 mono_arch_unwindinfo_add_set_fpreg (&cfg->arch.unwindinfo, cfg->native_code, code, AMD64_RBP);
6798 #endif
6799         }
6800
6801         /* The param area is always at offset 0 from sp */
6802         /* This needs to be allocated here, since it has to come after the spill area */
6803         if (cfg->param_area) {
6804                 if (cfg->arch.omit_fp)
6805                         // FIXME:
6806                         g_assert_not_reached ();
6807                 cfg->stack_offset += ALIGN_TO (cfg->param_area, sizeof(mgreg_t));
6808         }
6809
6810         if (cfg->arch.omit_fp) {
6811                 /* 
6812                  * On enter, the stack is misaligned by the pushing of the return
6813                  * address. It is either made aligned by the pushing of %rbp, or by
6814                  * this.
6815                  */
6816                 alloc_size = ALIGN_TO (cfg->stack_offset, 8);
6817                 if ((alloc_size % 16) == 0) {
6818                         alloc_size += 8;
6819                         /* Mark the padding slot as NOREF */
6820                         mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset - sizeof (mgreg_t), SLOT_NOREF);
6821                 }
6822         } else {
6823                 alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
6824                 if (cfg->stack_offset != alloc_size) {
6825                         /* Mark the padding slot as NOREF */
6826                         mini_gc_set_slot_type_from_fp (cfg, -alloc_size + cfg->param_area, SLOT_NOREF);
6827                 }
6828                 cfg->arch.sp_fp_offset = alloc_size;
6829                 alloc_size -= pos;
6830         }
6831
6832         cfg->arch.stack_alloc_size = alloc_size;
6833
6834         /* Allocate stack frame */
6835         if (alloc_size) {
6836                 /* See mono_emit_stack_alloc */
6837 #if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
6838                 guint32 remaining_size = alloc_size;
6839                 /*FIXME handle unbounded code expansion, we should use a loop in case of more than X interactions*/
6840                 guint32 required_code_size = ((remaining_size / 0x1000) + 1) * 10; /*10 is the max size of amd64_alu_reg_imm + amd64_test_membase_reg*/
6841                 guint32 offset = code - cfg->native_code;
6842                 if (G_UNLIKELY (required_code_size >= (cfg->code_size - offset))) {
6843                         while (required_code_size >= (cfg->code_size - offset))
6844                                 cfg->code_size *= 2;
6845                         cfg->native_code = mono_realloc_native_code (cfg);
6846                         code = cfg->native_code + offset;
6847                         cfg->stat_code_reallocs++;
6848                 }
6849
6850                 while (remaining_size >= 0x1000) {
6851                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 0x1000);
6852                         if (cfg->arch.omit_fp) {
6853                                 cfa_offset += 0x1000;
6854                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6855                         }
6856                         async_exc_point (code);
6857 #ifdef TARGET_WIN32
6858                         if (cfg->arch.omit_fp) 
6859                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, 0x1000);
6860 #endif
6861
6862                         amd64_test_membase_reg (code, AMD64_RSP, 0, AMD64_RSP);
6863                         remaining_size -= 0x1000;
6864                 }
6865                 if (remaining_size) {
6866                         amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, remaining_size);
6867                         if (cfg->arch.omit_fp) {
6868                                 cfa_offset += remaining_size;
6869                                 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6870                                 async_exc_point (code);
6871                         }
6872 #ifdef TARGET_WIN32
6873                         if (cfg->arch.omit_fp) 
6874                                 mono_arch_unwindinfo_add_alloc_stack (&cfg->arch.unwindinfo, cfg->native_code, code, remaining_size);
6875 #endif
6876                 }
6877 #else
6878                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, alloc_size);
6879                 if (cfg->arch.omit_fp) {
6880                         cfa_offset += alloc_size;
6881                         mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
6882                         async_exc_point (code);
6883                 }
6884 #endif
6885         }
6886
6887         /* Stack alignment check */
6888 #if 0
6889         {
6890                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_RSP, 8);
6891                 amd64_alu_reg_imm (code, X86_AND, AMD64_RAX, 0xf);
6892                 amd64_alu_reg_imm (code, X86_CMP, AMD64_RAX, 0);
6893                 x86_branch8 (code, X86_CC_EQ, 2, FALSE);
6894                 amd64_breakpoint (code);
6895         }
6896 #endif
6897
6898         if (mini_get_debug_options ()->init_stacks) {
6899                 /* Fill the stack frame with a dummy value to force deterministic behavior */
6900         
6901                 /* Save registers to the red zone */
6902                 amd64_mov_membase_reg (code, AMD64_RSP, -8, AMD64_RDI, 8);
6903                 amd64_mov_membase_reg (code, AMD64_RSP, -16, AMD64_RCX, 8);
6904
6905                 amd64_mov_reg_imm (code, AMD64_RAX, 0x2a2a2a2a2a2a2a2a);
6906                 amd64_mov_reg_imm (code, AMD64_RCX, alloc_size / 8);
6907                 amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RSP, 8);
6908
6909                 amd64_cld (code);
6910 #if defined(__default_codegen__)
6911                 amd64_prefix (code, X86_REP_PREFIX);
6912                 amd64_stosl (code);
6913 #elif defined(__native_client_codegen__)
6914                 /* NaCl stos pseudo-instruction */
6915                 amd64_codegen_pre (code);
6916                 /* First, clear the upper 32 bits of RDI (mov %edi, %edi)  */
6917                 amd64_mov_reg_reg (code, AMD64_RDI, AMD64_RDI, 4);
6918                 /* Add %r15 to %rdi using lea, condition flags unaffected. */
6919                 amd64_lea_memindex_size (code, AMD64_RDI, AMD64_R15, 0, AMD64_RDI, 0, 8);
6920                 amd64_prefix (code, X86_REP_PREFIX);
6921                 amd64_stosl (code);
6922                 amd64_codegen_post (code);
6923 #endif /* __native_client_codegen__ */
6924
6925                 amd64_mov_reg_membase (code, AMD64_RDI, AMD64_RSP, -8, 8);
6926                 amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RSP, -16, 8);
6927         }
6928
6929         /* Save LMF */
6930         if (method->save_lmf)
6931                 code = emit_setup_lmf (cfg, code, lmf_var->inst_offset, cfa_offset);
6932
6933         /* Save callee saved registers */
6934         if (cfg->arch.omit_fp) {
6935                 save_area_offset = cfg->arch.reg_save_area_offset;
6936                 /* Save caller saved registers after sp is adjusted */
6937                 /* The registers are saved at the bottom of the frame */
6938                 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
6939         } else {
6940                 /* The registers are saved just below the saved rbp */
6941                 save_area_offset = cfg->arch.reg_save_area_offset;
6942         }
6943
6944         for (i = 0; i < AMD64_NREG; ++i) {
6945                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
6946                         amd64_mov_membase_reg (code, cfg->frame_reg, save_area_offset, i, 8);
6947
6948                         if (cfg->arch.omit_fp) {
6949                                 mono_emit_unwind_op_offset (cfg, code, i, - (cfa_offset - save_area_offset));
6950                                 /* These are handled automatically by the stack marking code */
6951                                 mini_gc_set_slot_type_from_cfa (cfg, - (cfa_offset - save_area_offset), SLOT_NOREF);
6952                         } else {
6953                                 mono_emit_unwind_op_offset (cfg, code, i, - (-save_area_offset + (2 * 8)));
6954                                 // FIXME: GC
6955                         }
6956
6957                         save_area_offset += 8;
6958                         async_exc_point (code);
6959                 }
6960         }
6961
6962         /* store runtime generic context */
6963         if (cfg->rgctx_var) {
6964                 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET &&
6965                                 (cfg->rgctx_var->inst_basereg == AMD64_RBP || cfg->rgctx_var->inst_basereg == AMD64_RSP));
6966
6967                 amd64_mov_membase_reg (code, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, sizeof(gpointer));
6968
6969                 mono_add_var_location (cfg, cfg->rgctx_var, TRUE, MONO_ARCH_RGCTX_REG, 0, 0, code - cfg->native_code);
6970                 mono_add_var_location (cfg, cfg->rgctx_var, FALSE, cfg->rgctx_var->inst_basereg, cfg->rgctx_var->inst_offset, code - cfg->native_code, 0);
6971         }
6972
6973         /* compute max_length in order to use short forward jumps */
6974         max_epilog_size = get_max_epilog_size (cfg);
6975         if (cfg->opt & MONO_OPT_BRANCH) {
6976                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
6977                         MonoInst *ins;
6978                         int max_length = 0;
6979
6980                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
6981                                 max_length += 6;
6982                         /* max alignment for loops */
6983                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
6984                                 max_length += LOOP_ALIGNMENT;
6985 #ifdef __native_client_codegen__
6986                         /* max alignment for native client */
6987                         max_length += kNaClAlignment;
6988 #endif
6989
6990                         MONO_BB_FOR_EACH_INS (bb, ins) {
6991 #ifdef __native_client_codegen__
6992                                 {
6993                                         int space_in_block = kNaClAlignment -
6994                                                 ((max_length + cfg->code_len) & kNaClAlignmentMask);
6995                                         int max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
6996                                         if (space_in_block < max_len && max_len < kNaClAlignment) {
6997                                                 max_length += space_in_block;
6998                                         }
6999                                 }
7000 #endif  /*__native_client_codegen__*/
7001                                 max_length += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
7002                         }
7003
7004                         /* Take prolog and epilog instrumentation into account */
7005                         if (bb == cfg->bb_entry || bb == cfg->bb_exit)
7006                                 max_length += max_epilog_size;
7007                         
7008                         bb->max_length = max_length;
7009                 }
7010         }
7011
7012         sig = mono_method_signature (method);
7013         pos = 0;
7014
7015         cinfo = cfg->arch.cinfo;
7016
7017         if (sig->ret->type != MONO_TYPE_VOID) {
7018                 /* Save volatile arguments to the stack */
7019                 if (cfg->vret_addr && (cfg->vret_addr->opcode != OP_REGVAR))
7020                         amd64_mov_membase_reg (code, cfg->vret_addr->inst_basereg, cfg->vret_addr->inst_offset, cinfo->ret.reg, 8);
7021         }
7022
7023         /* Keep this in sync with emit_load_volatile_arguments */
7024         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
7025                 ArgInfo *ainfo = cinfo->args + i;
7026
7027                 ins = cfg->args [i];
7028
7029                 if ((ins->flags & MONO_INST_IS_DEAD) && !trace)
7030                         /* Unused arguments */
7031                         continue;
7032
7033                 /* Save volatile arguments to the stack */
7034                 if (ins->opcode != OP_REGVAR) {
7035                         switch (ainfo->storage) {
7036                         case ArgInIReg: {
7037                                 guint32 size = 8;
7038
7039                                 /* FIXME: I1 etc */
7040                                 /*
7041                                 if (stack_offset & 0x1)
7042                                         size = 1;
7043                                 else if (stack_offset & 0x2)
7044                                         size = 2;
7045                                 else if (stack_offset & 0x4)
7046                                         size = 4;
7047                                 else
7048                                         size = 8;
7049                                 */
7050                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg, size);
7051
7052                                 /*
7053                                  * Save the original location of 'this',
7054                                  * get_generic_info_from_stack_frame () needs this to properly look up
7055                                  * the argument value during the handling of async exceptions.
7056                                  */
7057                                 if (ins == cfg->args [0]) {
7058                                         mono_add_var_location (cfg, ins, TRUE, ainfo->reg, 0, 0, code - cfg->native_code);
7059                                         mono_add_var_location (cfg, ins, FALSE, ins->inst_basereg, ins->inst_offset, code - cfg->native_code, 0);
7060                                 }
7061                                 break;
7062                         }
7063                         case ArgInFloatSSEReg:
7064                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
7065                                 break;
7066                         case ArgInDoubleSSEReg:
7067                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset, ainfo->reg);
7068                                 break;
7069                         case ArgValuetypeInReg:
7070                                 for (quad = 0; quad < 2; quad ++) {
7071                                         switch (ainfo->pair_storage [quad]) {
7072                                         case ArgInIReg:
7073                                                 amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad], sizeof(mgreg_t));
7074                                                 break;
7075                                         case ArgInFloatSSEReg:
7076                                                 amd64_movss_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]);
7077                                                 break;
7078                                         case ArgInDoubleSSEReg:
7079                                                 amd64_movsd_membase_reg (code, ins->inst_basereg, ins->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_regs [quad]);
7080                                                 break;
7081                                         case ArgNone:
7082                                                 break;
7083                                         default:
7084                                                 g_assert_not_reached ();
7085                                         }
7086                                 }
7087                                 break;
7088                         case ArgValuetypeAddrInIReg:
7089                                 if (ainfo->pair_storage [0] == ArgInIReg)
7090                                         amd64_mov_membase_reg (code, ins->inst_left->inst_basereg, ins->inst_left->inst_offset, ainfo->pair_regs [0],  sizeof (gpointer));
7091                                 break;
7092                         default:
7093                                 break;
7094                         }
7095                 } else {
7096                         /* Argument allocated to (non-volatile) register */
7097                         switch (ainfo->storage) {
7098                         case ArgInIReg:
7099                                 amd64_mov_reg_reg (code, ins->dreg, ainfo->reg, 8);
7100                                 break;
7101                         case ArgOnStack:
7102                                 amd64_mov_reg_membase (code, ins->dreg, AMD64_RBP, ARGS_OFFSET + ainfo->offset, 8);
7103                                 break;
7104                         default:
7105                                 g_assert_not_reached ();
7106                         }
7107
7108                         if (ins == cfg->args [0]) {
7109                                 mono_add_var_location (cfg, ins, TRUE, ainfo->reg, 0, 0, code - cfg->native_code);
7110                                 mono_add_var_location (cfg, ins, TRUE, ins->dreg, 0, code - cfg->native_code, 0);
7111                         }
7112                 }
7113         }
7114
7115         if (cfg->method->save_lmf)
7116                 args_clobbered = TRUE;
7117
7118         if (trace) {
7119                 args_clobbered = TRUE;
7120                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
7121         }
7122
7123         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
7124                 args_clobbered = TRUE;
7125
7126         /*
7127          * Optimize the common case of the first bblock making a call with the same
7128          * arguments as the method. This works because the arguments are still in their
7129          * original argument registers.
7130          * FIXME: Generalize this
7131          */
7132         if (!args_clobbered) {
7133                 MonoBasicBlock *first_bb = cfg->bb_entry;
7134                 MonoInst *next;
7135                 int filter = FILTER_IL_SEQ_POINT;
7136
7137                 next = mono_bb_first_inst (first_bb, filter);
7138                 if (!next && first_bb->next_bb) {
7139                         first_bb = first_bb->next_bb;
7140                         next = mono_bb_first_inst (first_bb, filter);
7141                 }
7142
7143                 if (first_bb->in_count > 1)
7144                         next = NULL;
7145
7146                 for (i = 0; next && i < sig->param_count + sig->hasthis; ++i) {
7147                         ArgInfo *ainfo = cinfo->args + i;
7148                         gboolean match = FALSE;
7149
7150                         ins = cfg->args [i];
7151                         if (ins->opcode != OP_REGVAR) {
7152                                 switch (ainfo->storage) {
7153                                 case ArgInIReg: {
7154                                         if (((next->opcode == OP_LOAD_MEMBASE) || (next->opcode == OP_LOADI4_MEMBASE)) && next->inst_basereg == ins->inst_basereg && next->inst_offset == ins->inst_offset) {
7155                                                 if (next->dreg == ainfo->reg) {
7156                                                         NULLIFY_INS (next);
7157                                                         match = TRUE;
7158                                                 } else {
7159                                                         next->opcode = OP_MOVE;
7160                                                         next->sreg1 = ainfo->reg;
7161                                                         /* Only continue if the instruction doesn't change argument regs */
7162                                                         if (next->dreg == ainfo->reg || next->dreg == AMD64_RAX)
7163                                                                 match = TRUE;
7164                                                 }
7165                                         }
7166                                         break;
7167                                 }
7168                                 default:
7169                                         break;
7170                                 }
7171                         } else {
7172                                 /* Argument allocated to (non-volatile) register */
7173                                 switch (ainfo->storage) {
7174                                 case ArgInIReg:
7175                                         if (next->opcode == OP_MOVE && next->sreg1 == ins->dreg && next->dreg == ainfo->reg) {
7176                                                 NULLIFY_INS (next);
7177                                                 match = TRUE;
7178                                         }
7179                                         break;
7180                                 default:
7181                                         break;
7182                                 }
7183                         }
7184
7185                         if (match) {
7186                                 next = mono_inst_next (next, filter);
7187                                 //next = mono_inst_list_next (&next->node, &first_bb->ins_list);
7188                                 if (!next)
7189                                         break;
7190                         }
7191                 }
7192         }
7193
7194         if (cfg->gen_sdb_seq_points) {
7195                 MonoInst *info_var = cfg->arch.seq_point_info_var;
7196
7197                 /* Initialize seq_point_info_var */
7198                 if (cfg->compile_aot) {
7199                         /* Initialize the variable from a GOT slot */
7200                         /* Same as OP_AOTCONST */
7201                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_SEQ_POINT_INFO, cfg->method);
7202                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, sizeof(gpointer));
7203                         g_assert (info_var->opcode == OP_REGOFFSET);
7204                         amd64_mov_membase_reg (code, info_var->inst_basereg, info_var->inst_offset, AMD64_R11, 8);
7205                 }
7206
7207                 if (cfg->compile_aot) {
7208                         /* Initialize ss_tramp_var */
7209                         ins = cfg->arch.ss_tramp_var;
7210                         g_assert (ins->opcode == OP_REGOFFSET);
7211
7212                         amd64_mov_reg_membase (code, AMD64_R11, info_var->inst_basereg, info_var->inst_offset, 8);
7213                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, MONO_STRUCT_OFFSET (SeqPointInfo, ss_tramp_addr), 8);
7214                         amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
7215                 } else {
7216                         /* Initialize ss_trigger_page_var */
7217                         ins = cfg->arch.ss_trigger_page_var;
7218
7219                         g_assert (ins->opcode == OP_REGOFFSET);
7220
7221                         amd64_mov_reg_imm (code, AMD64_R11, (guint64)ss_trigger_page);
7222                         amd64_mov_membase_reg (code, ins->inst_basereg, ins->inst_offset, AMD64_R11, 8);
7223                 }
7224         }
7225
7226         cfg->code_len = code - cfg->native_code;
7227
7228         g_assert (cfg->code_len < cfg->code_size);
7229
7230         return code;
7231 }
7232
7233 void
7234 mono_arch_emit_epilog (MonoCompile *cfg)
7235 {
7236         MonoMethod *method = cfg->method;
7237         int quad, i;
7238         guint8 *code;
7239         int max_epilog_size;
7240         CallInfo *cinfo;
7241         gint32 lmf_offset = cfg->lmf_var ? ((MonoInst*)cfg->lmf_var)->inst_offset : -1;
7242         gint32 save_area_offset = cfg->arch.reg_save_area_offset;
7243
7244         max_epilog_size = get_max_epilog_size (cfg);
7245
7246         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
7247                 cfg->code_size *= 2;
7248                 cfg->native_code = mono_realloc_native_code (cfg);
7249                 cfg->stat_code_reallocs++;
7250         }
7251         code = cfg->native_code + cfg->code_len;
7252
7253         cfg->has_unwind_info_for_epilog = TRUE;
7254
7255         /* Mark the start of the epilog */
7256         mono_emit_unwind_op_mark_loc (cfg, code, 0);
7257
7258         /* Save the uwind state which is needed by the out-of-line code */
7259         mono_emit_unwind_op_remember_state (cfg, code);
7260
7261         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
7262                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
7263
7264         /* the code restoring the registers must be kept in sync with OP_TAILCALL */
7265         
7266         if (method->save_lmf) {
7267                 /* check if we need to restore protection of the stack after a stack overflow */
7268                 if (!cfg->compile_aot && mono_get_jit_tls_offset () != -1) {
7269                         guint8 *patch;
7270                         code = mono_amd64_emit_tls_get (code, AMD64_RCX, mono_get_jit_tls_offset ());
7271                         /* we load the value in a separate instruction: this mechanism may be
7272                          * used later as a safer way to do thread interruption
7273                          */
7274                         amd64_mov_reg_membase (code, AMD64_RCX, AMD64_RCX, MONO_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 8);
7275                         x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
7276                         patch = code;
7277                         x86_branch8 (code, X86_CC_Z, 0, FALSE);
7278                         /* note that the call trampoline will preserve eax/edx */
7279                         x86_call_reg (code, X86_ECX);
7280                         x86_patch (patch, code);
7281                 } else {
7282                         /* FIXME: maybe save the jit tls in the prolog */
7283                 }
7284                 if (cfg->used_int_regs & (1 << AMD64_RBP)) {
7285                         amd64_mov_reg_membase (code, AMD64_RBP, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, rbp), 8);
7286                 }
7287         }
7288
7289         /* Restore callee saved regs */
7290         for (i = 0; i < AMD64_NREG; ++i) {
7291                 if (AMD64_IS_CALLEE_SAVED_REG (i) && (cfg->arch.saved_iregs & (1 << i))) {
7292                         /* Restore only used_int_regs, not arch.saved_iregs */
7293                         if (cfg->used_int_regs & (1 << i)) {
7294                                 amd64_mov_reg_membase (code, i, cfg->frame_reg, save_area_offset, 8);
7295                                 mono_emit_unwind_op_same_value (cfg, code, i);
7296                                 async_exc_point (code);
7297                         }
7298                         save_area_offset += 8;
7299                 }
7300         }
7301
7302         /* Load returned vtypes into registers if needed */
7303         cinfo = cfg->arch.cinfo;
7304         if (cinfo->ret.storage == ArgValuetypeInReg) {
7305                 ArgInfo *ainfo = &cinfo->ret;
7306                 MonoInst *inst = cfg->ret;
7307
7308                 for (quad = 0; quad < 2; quad ++) {
7309                         switch (ainfo->pair_storage [quad]) {
7310                         case ArgInIReg:
7311                                 amd64_mov_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)), ainfo->pair_size [quad]);
7312                                 break;
7313                         case ArgInFloatSSEReg:
7314                                 amd64_movss_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)));
7315                                 break;
7316                         case ArgInDoubleSSEReg:
7317                                 amd64_movsd_reg_membase (code, ainfo->pair_regs [quad], inst->inst_basereg, inst->inst_offset + (quad * sizeof(mgreg_t)));
7318                                 break;
7319                         case ArgNone:
7320                                 break;
7321                         default:
7322                                 g_assert_not_reached ();
7323                         }
7324                 }
7325         }
7326
7327         if (cfg->arch.omit_fp) {
7328                 if (cfg->arch.stack_alloc_size) {
7329                         amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, cfg->arch.stack_alloc_size);
7330                 }
7331         } else {
7332                 amd64_leave (code);
7333                 mono_emit_unwind_op_same_value (cfg, code, AMD64_RBP);
7334         }
7335         mono_emit_unwind_op_def_cfa (cfg, code, AMD64_RSP, 8);
7336         async_exc_point (code);
7337         amd64_ret (code);
7338
7339         /* Restore the unwind state to be the same as before the epilog */
7340         mono_emit_unwind_op_restore_state (cfg, code);
7341
7342         cfg->code_len = code - cfg->native_code;
7343
7344         g_assert (cfg->code_len < cfg->code_size);
7345 }
7346
7347 void
7348 mono_arch_emit_exceptions (MonoCompile *cfg)
7349 {
7350         MonoJumpInfo *patch_info;
7351         int nthrows, i;
7352         guint8 *code;
7353         MonoClass *exc_classes [16];
7354         guint8 *exc_throw_start [16], *exc_throw_end [16];
7355         guint32 code_size = 0;
7356
7357         /* Compute needed space */
7358         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7359                 if (patch_info->type == MONO_PATCH_INFO_EXC)
7360                         code_size += 40;
7361                 if (patch_info->type == MONO_PATCH_INFO_R8)
7362                         code_size += 8 + 15; /* sizeof (double) + alignment */
7363                 if (patch_info->type == MONO_PATCH_INFO_R4)
7364                         code_size += 4 + 15; /* sizeof (float) + alignment */
7365                 if (patch_info->type == MONO_PATCH_INFO_GC_CARD_TABLE_ADDR)
7366                         code_size += 8 + 7; /*sizeof (void*) + alignment */
7367         }
7368
7369 #ifdef __native_client_codegen__
7370         /* Give us extra room on Native Client.  This could be   */
7371         /* more carefully calculated, but bundle alignment makes */
7372         /* it much trickier, so *2 like other places is good.    */
7373         code_size *= 2;
7374 #endif
7375
7376         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
7377                 cfg->code_size *= 2;
7378                 cfg->native_code = mono_realloc_native_code (cfg);
7379                 cfg->stat_code_reallocs++;
7380         }
7381
7382         code = cfg->native_code + cfg->code_len;
7383
7384         /* add code to raise exceptions */
7385         nthrows = 0;
7386         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7387                 switch (patch_info->type) {
7388                 case MONO_PATCH_INFO_EXC: {
7389                         MonoClass *exc_class;
7390                         guint8 *buf, *buf2;
7391                         guint32 throw_ip;
7392
7393                         amd64_patch (patch_info->ip.i + cfg->native_code, code);
7394
7395                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
7396                         g_assert (exc_class);
7397                         throw_ip = patch_info->ip.i;
7398
7399                         //x86_breakpoint (code);
7400                         /* Find a throw sequence for the same exception class */
7401                         for (i = 0; i < nthrows; ++i)
7402                                 if (exc_classes [i] == exc_class)
7403                                         break;
7404                         if (i < nthrows) {
7405                                 amd64_mov_reg_imm (code, AMD64_ARG_REG2, (exc_throw_end [i] - cfg->native_code) - throw_ip);
7406                                 x86_jump_code (code, exc_throw_start [i]);
7407                                 patch_info->type = MONO_PATCH_INFO_NONE;
7408                         }
7409                         else {
7410                                 buf = code;
7411                                 amd64_mov_reg_imm_size (code, AMD64_ARG_REG2, 0xf0f0f0f0, 4);
7412                                 buf2 = code;
7413
7414                                 if (nthrows < 16) {
7415                                         exc_classes [nthrows] = exc_class;
7416                                         exc_throw_start [nthrows] = code;
7417                                 }
7418                                 amd64_mov_reg_imm (code, AMD64_ARG_REG1, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
7419
7420                                 patch_info->type = MONO_PATCH_INFO_NONE;
7421
7422                                 code = emit_call_body (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, "mono_arch_throw_corlib_exception");
7423
7424                                 amd64_mov_reg_imm (buf, AMD64_ARG_REG2, (code - cfg->native_code) - throw_ip);
7425                                 while (buf < buf2)
7426                                         x86_nop (buf);
7427
7428                                 if (nthrows < 16) {
7429                                         exc_throw_end [nthrows] = code;
7430                                         nthrows ++;
7431                                 }
7432                         }
7433                         break;
7434                 }
7435                 default:
7436                         /* do nothing */
7437                         break;
7438                 }
7439                 g_assert(code < cfg->native_code + cfg->code_size);
7440         }
7441
7442         /* Handle relocations with RIP relative addressing */
7443         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
7444                 gboolean remove = FALSE;
7445                 guint8 *orig_code = code;
7446
7447                 switch (patch_info->type) {
7448                 case MONO_PATCH_INFO_R8:
7449                 case MONO_PATCH_INFO_R4: {
7450                         guint8 *pos, *patch_pos;
7451                         guint32 target_pos;
7452
7453                         /* The SSE opcodes require a 16 byte alignment */
7454 #if defined(__default_codegen__)
7455                         code = (guint8*)ALIGN_TO (code, 16);
7456 #elif defined(__native_client_codegen__)
7457                         {
7458                                 /* Pad this out with HLT instructions  */
7459                                 /* or we can get garbage bytes emitted */
7460                                 /* which will fail validation          */
7461                                 guint8 *aligned_code;
7462                                 /* extra align to make room for  */
7463                                 /* mov/push below                      */
7464                                 int extra_align = patch_info->type == MONO_PATCH_INFO_R8 ? 2 : 1;
7465                                 aligned_code = (guint8*)ALIGN_TO (code + extra_align, 16);
7466                                 /* The technique of hiding data in an  */
7467                                 /* instruction has a problem here: we  */
7468                                 /* need the data aligned to a 16-byte  */
7469                                 /* boundary but the instruction cannot */
7470                                 /* cross the bundle boundary. so only  */
7471                                 /* odd multiples of 16 can be used     */
7472                                 if ((intptr_t)aligned_code % kNaClAlignment == 0) {
7473                                         aligned_code += 16;
7474                                 }
7475                                 while (code < aligned_code) {
7476                                         *(code++) = 0xf4; /* hlt */
7477                                 }
7478                         }       
7479 #endif
7480
7481                         pos = cfg->native_code + patch_info->ip.i;
7482                         if (IS_REX (pos [1])) {
7483                                 patch_pos = pos + 5;
7484                                 target_pos = code - pos - 9;
7485                         }
7486                         else {
7487                                 patch_pos = pos + 4;
7488                                 target_pos = code - pos - 8;
7489                         }
7490
7491                         if (patch_info->type == MONO_PATCH_INFO_R8) {
7492 #ifdef __native_client_codegen__
7493                                 /* Hide 64-bit data in a         */
7494                                 /* "mov imm64, r11" instruction. */
7495                                 /* write it before the start of  */
7496                                 /* the data*/
7497                                 *(code-2) = 0x49; /* prefix      */
7498                                 *(code-1) = 0xbb; /* mov X, %r11 */
7499 #endif
7500                                 *(double*)code = *(double*)patch_info->data.target;
7501                                 code += sizeof (double);
7502                         } else {
7503 #ifdef __native_client_codegen__
7504                                 /* Hide 32-bit data in a        */
7505                                 /* "push imm32" instruction.    */
7506                                 *(code-1) = 0x68; /* push */
7507 #endif
7508                                 *(float*)code = *(float*)patch_info->data.target;
7509                                 code += sizeof (float);
7510                         }
7511
7512                         *(guint32*)(patch_pos) = target_pos;
7513
7514                         remove = TRUE;
7515                         break;
7516                 }
7517                 case MONO_PATCH_INFO_GC_CARD_TABLE_ADDR: {
7518                         guint8 *pos;
7519
7520                         if (cfg->compile_aot)
7521                                 continue;
7522
7523                         /*loading is faster against aligned addresses.*/
7524                         code = (guint8*)ALIGN_TO (code, 8);
7525                         memset (orig_code, 0, code - orig_code);
7526
7527                         pos = cfg->native_code + patch_info->ip.i;
7528
7529                         /*alu_op [rex] modr/m imm32 - 7 or 8 bytes */
7530                         if (IS_REX (pos [1]))
7531                                 *(guint32*)(pos + 4) = (guint8*)code - pos - 8;
7532                         else
7533                                 *(guint32*)(pos + 3) = (guint8*)code - pos - 7;
7534
7535                         *(gpointer*)code = (gpointer)patch_info->data.target;
7536                         code += sizeof (gpointer);
7537
7538                         remove = TRUE;
7539                         break;
7540                 }
7541                 default:
7542                         break;
7543                 }
7544
7545                 if (remove) {
7546                         if (patch_info == cfg->patch_info)
7547                                 cfg->patch_info = patch_info->next;
7548                         else {
7549                                 MonoJumpInfo *tmp;
7550
7551                                 for (tmp = cfg->patch_info; tmp->next != patch_info; tmp = tmp->next)
7552                                         ;
7553                                 tmp->next = patch_info->next;
7554                         }
7555                 }
7556                 g_assert (code < cfg->native_code + cfg->code_size);
7557         }
7558
7559         cfg->code_len = code - cfg->native_code;
7560
7561         g_assert (cfg->code_len < cfg->code_size);
7562
7563 }
7564
7565 #endif /* DISABLE_JIT */
7566
7567 void*
7568 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
7569 {
7570         guchar *code = p;
7571         MonoMethodSignature *sig;
7572         MonoInst *inst;
7573         int i, n, stack_area = 0;
7574
7575         /* Keep this in sync with mono_arch_get_argument_info */
7576
7577         if (enable_arguments) {
7578                 /* Allocate a new area on the stack and save arguments there */
7579                 sig = mono_method_signature (cfg->method);
7580
7581                 n = sig->param_count + sig->hasthis;
7582
7583                 stack_area = ALIGN_TO (n * 8, 16);
7584
7585                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, stack_area);
7586
7587                 for (i = 0; i < n; ++i) {
7588                         inst = cfg->args [i];
7589
7590                         if (inst->opcode == OP_REGVAR)
7591                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), inst->dreg, 8);
7592                         else {
7593                                 amd64_mov_reg_membase (code, AMD64_R11, inst->inst_basereg, inst->inst_offset, 8);
7594                                 amd64_mov_membase_reg (code, AMD64_RSP, (i * 8), AMD64_R11, 8);
7595                         }
7596                 }
7597         }
7598
7599         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
7600         amd64_set_reg_template (code, AMD64_ARG_REG1);
7601         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RSP, 8);
7602         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
7603
7604         if (enable_arguments)
7605                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, stack_area);
7606
7607         return code;
7608 }
7609
7610 enum {
7611         SAVE_NONE,
7612         SAVE_STRUCT,
7613         SAVE_EAX,
7614         SAVE_EAX_EDX,
7615         SAVE_XMM
7616 };
7617
7618 void*
7619 mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers)
7620 {
7621         guchar *code = p;
7622         int save_mode = SAVE_NONE;
7623         MonoMethod *method = cfg->method;
7624         MonoType *ret_type = mini_get_underlying_type (mono_method_signature (method)->ret);
7625         int i;
7626         
7627         switch (ret_type->type) {
7628         case MONO_TYPE_VOID:
7629                 /* special case string .ctor icall */
7630                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
7631                         save_mode = SAVE_EAX;
7632                 else
7633                         save_mode = SAVE_NONE;
7634                 break;
7635         case MONO_TYPE_I8:
7636         case MONO_TYPE_U8:
7637                 save_mode = SAVE_EAX;
7638                 break;
7639         case MONO_TYPE_R4:
7640         case MONO_TYPE_R8:
7641                 save_mode = SAVE_XMM;
7642                 break;
7643         case MONO_TYPE_GENERICINST:
7644                 if (!mono_type_generic_inst_is_valuetype (ret_type)) {
7645                         save_mode = SAVE_EAX;
7646                         break;
7647                 }
7648                 /* Fall through */
7649         case MONO_TYPE_VALUETYPE:
7650                 save_mode = SAVE_STRUCT;
7651                 break;
7652         default:
7653                 save_mode = SAVE_EAX;
7654                 break;
7655         }
7656
7657         /* Save the result and copy it into the proper argument register */
7658         switch (save_mode) {
7659         case SAVE_EAX:
7660                 amd64_push_reg (code, AMD64_RAX);
7661                 /* Align stack */
7662                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7663                 if (enable_arguments)
7664                         amd64_mov_reg_reg (code, AMD64_ARG_REG2, AMD64_RAX, 8);
7665                 break;
7666         case SAVE_STRUCT:
7667                 /* FIXME: */
7668                 if (enable_arguments)
7669                         amd64_mov_reg_imm (code, AMD64_ARG_REG2, 0);
7670                 break;
7671         case SAVE_XMM:
7672                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7673                 amd64_movsd_membase_reg (code, AMD64_RSP, 0, AMD64_XMM0);
7674                 /* Align stack */
7675                 amd64_alu_reg_imm (code, X86_SUB, AMD64_RSP, 8);
7676                 /* 
7677                  * The result is already in the proper argument register so no copying
7678                  * needed.
7679                  */
7680                 break;
7681         case SAVE_NONE:
7682                 break;
7683         default:
7684                 g_assert_not_reached ();
7685         }
7686
7687         /* Set %al since this is a varargs call */
7688         if (save_mode == SAVE_XMM)
7689                 amd64_mov_reg_imm (code, AMD64_RAX, 1);
7690         else
7691                 amd64_mov_reg_imm (code, AMD64_RAX, 0);
7692
7693         if (preserve_argument_registers) {
7694                 for (i = 0; i < PARAM_REGS; ++i)
7695                         amd64_push_reg (code, param_regs [i]);
7696         }
7697
7698         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
7699         amd64_set_reg_template (code, AMD64_ARG_REG1);
7700         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func, TRUE);
7701
7702         if (preserve_argument_registers) {
7703                 for (i = PARAM_REGS - 1; i >= 0; --i)
7704                         amd64_pop_reg (code, param_regs [i]);
7705         }
7706
7707         /* Restore result */
7708         switch (save_mode) {
7709         case SAVE_EAX:
7710                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7711                 amd64_pop_reg (code, AMD64_RAX);
7712                 break;
7713         case SAVE_STRUCT:
7714                 /* FIXME: */
7715                 break;
7716         case SAVE_XMM:
7717                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7718                 amd64_movsd_reg_membase (code, AMD64_XMM0, AMD64_RSP, 0);
7719                 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 8);
7720                 break;
7721         case SAVE_NONE:
7722                 break;
7723         default:
7724                 g_assert_not_reached ();
7725         }
7726
7727         return code;
7728 }
7729
7730 void
7731 mono_arch_flush_icache (guint8 *code, gint size)
7732 {
7733         /* Not needed */
7734 }
7735
7736 void
7737 mono_arch_flush_register_windows (void)
7738 {
7739 }
7740
7741 gboolean 
7742 mono_arch_is_inst_imm (gint64 imm)
7743 {
7744         return amd64_use_imm32 (imm);
7745 }
7746
7747 /*
7748  * Determine whenever the trap whose info is in SIGINFO is caused by
7749  * integer overflow.
7750  */
7751 gboolean
7752 mono_arch_is_int_overflow (void *sigctx, void *info)
7753 {
7754         MonoContext ctx;
7755         guint8* rip;
7756         int reg;
7757         gint64 value;
7758
7759         mono_sigctx_to_monoctx (sigctx, &ctx);
7760
7761         rip = (guint8*)ctx.gregs [AMD64_RIP];
7762
7763         if (IS_REX (rip [0])) {
7764                 reg = amd64_rex_b (rip [0]);
7765                 rip ++;
7766         }
7767         else
7768                 reg = 0;
7769
7770         if ((rip [0] == 0xf7) && (x86_modrm_mod (rip [1]) == 0x3) && (x86_modrm_reg (rip [1]) == 0x7)) {
7771                 /* idiv REG */
7772                 reg += x86_modrm_rm (rip [1]);
7773
7774                 value = ctx.gregs [reg];
7775
7776                 if (value == -1)
7777                         return TRUE;
7778         }
7779
7780         return FALSE;
7781 }
7782
7783 guint32
7784 mono_arch_get_patch_offset (guint8 *code)
7785 {
7786         return 3;
7787 }
7788
7789 /**
7790  * mono_breakpoint_clean_code:
7791  *
7792  * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
7793  * breakpoints in the original code, they are removed in the copy.
7794  *
7795  * Returns TRUE if no sw breakpoint was present.
7796  */
7797 gboolean
7798 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
7799 {
7800         /*
7801          * If method_start is non-NULL we need to perform bound checks, since we access memory
7802          * at code - offset we could go before the start of the method and end up in a different
7803          * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
7804          * instead.
7805          */
7806         if (!method_start || code - offset >= method_start) {
7807                 memcpy (buf, code - offset, size);
7808         } else {
7809                 int diff = code - method_start;
7810                 memset (buf, 0, size);
7811                 memcpy (buf + offset - diff, method_start, diff + size - offset);
7812         }
7813         return TRUE;
7814 }
7815
7816 #if defined(__native_client_codegen__)
7817 /* For membase calls, we want the base register. for Native Client,  */
7818 /* all indirect calls have the following sequence with the given sizes: */
7819 /* mov %eXX,%eXX                                [2-3]   */
7820 /* mov disp(%r15,%rXX,scale),%r11d              [4-8]   */
7821 /* and $0xffffffffffffffe0,%r11d                [4]     */
7822 /* add %r15,%r11                                [3]     */
7823 /* callq *%r11                                  [3]     */
7824
7825
7826 /* Determine if code points to a NaCl call-through-register sequence, */
7827 /* (i.e., the last 3 instructions listed above) */
7828 int
7829 is_nacl_call_reg_sequence(guint8* code)
7830 {
7831         const char *sequence = "\x41\x83\xe3\xe0" /* and */
7832                                "\x4d\x03\xdf"     /* add */
7833                                "\x41\xff\xd3";   /* call */
7834         return memcmp(code, sequence, 10) == 0;
7835 }
7836
7837 /* Determine if code points to the first opcode of the mov membase component */
7838 /* of an indirect call sequence (i.e. the first 2 instructions listed above) */
7839 /* (there could be a REX prefix before the opcode but it is ignored) */
7840 static int
7841 is_nacl_indirect_call_membase_sequence(guint8* code)
7842 {
7843                /* Check for mov opcode, reg-reg addressing mode (mod = 3), */
7844         return code[0] == 0x8b && amd64_modrm_mod(code[1]) == 3 &&
7845                /* and that src reg = dest reg */
7846                amd64_modrm_reg(code[1]) == amd64_modrm_rm(code[1]) &&
7847                /* Check that next inst is mov, uses SIB byte (rm = 4), */
7848                IS_REX(code[2]) &&
7849                code[3] == 0x8b && amd64_modrm_rm(code[4]) == 4 &&
7850                /* and has dst of r11 and base of r15 */
7851                (amd64_modrm_reg(code[4]) + amd64_rex_r(code[2])) == AMD64_R11 &&
7852                (amd64_sib_base(code[5]) + amd64_rex_b(code[2])) == AMD64_R15;
7853 }
7854 #endif /* __native_client_codegen__ */
7855
7856 int
7857 mono_arch_get_this_arg_reg (guint8 *code)
7858 {
7859         return AMD64_ARG_REG1;
7860 }
7861
7862 gpointer
7863 mono_arch_get_this_arg_from_call (mgreg_t *regs, guint8 *code)
7864 {
7865         return (gpointer)regs [mono_arch_get_this_arg_reg (code)];
7866 }
7867
7868 #define MAX_ARCH_DELEGATE_PARAMS 10
7869
7870 static gpointer
7871 get_delegate_invoke_impl (MonoTrampInfo **info, gboolean has_target, guint32 param_count)
7872 {
7873         guint8 *code, *start;
7874         GSList *unwind_ops = NULL;
7875         int i;
7876
7877         unwind_ops = mono_arch_get_cie_program ();
7878
7879         if (has_target) {
7880                 start = code = mono_global_codeman_reserve (64);
7881
7882                 /* Replace the this argument with the target */
7883                 amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7884                 amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, target), 8);
7885                 amd64_jump_membase (code, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7886
7887                 g_assert ((code - start) < 64);
7888         } else {
7889                 start = code = mono_global_codeman_reserve (64);
7890
7891                 if (param_count == 0) {
7892                         amd64_jump_membase (code, AMD64_ARG_REG1, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7893                 } else {
7894                         /* We have to shift the arguments left */
7895                         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7896                         for (i = 0; i < param_count; ++i) {
7897 #ifdef TARGET_WIN32
7898                                 if (i < 3)
7899                                         amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7900                                 else
7901                                         amd64_mov_reg_membase (code, param_regs [i], AMD64_RSP, 0x28, 8);
7902 #else
7903                                 amd64_mov_reg_reg (code, param_regs [i], param_regs [i + 1], 8);
7904 #endif
7905                         }
7906
7907                         amd64_jump_membase (code, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method_ptr));
7908                 }
7909                 g_assert ((code - start) < 64);
7910         }
7911
7912         nacl_global_codeman_validate (&start, 64, &code);
7913         mono_arch_flush_icache (start, code - start);
7914
7915         if (has_target) {
7916                 *info = mono_tramp_info_create ("delegate_invoke_impl_has_target", start, code - start, NULL, unwind_ops);
7917         } else {
7918                 char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", param_count);
7919                 *info = mono_tramp_info_create (name, start, code - start, NULL, unwind_ops);
7920                 g_free (name);
7921         }
7922
7923         if (mono_jit_map_is_enabled ()) {
7924                 char *buff;
7925                 if (has_target)
7926                         buff = (char*)"delegate_invoke_has_target";
7927                 else
7928                         buff = g_strdup_printf ("delegate_invoke_no_target_%d", param_count);
7929                 mono_emit_jit_tramp (start, code - start, buff);
7930                 if (!has_target)
7931                         g_free (buff);
7932         }
7933         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
7934
7935         return start;
7936 }
7937
7938 #define MAX_VIRTUAL_DELEGATE_OFFSET 32
7939
7940 static gpointer
7941 get_delegate_virtual_invoke_impl (MonoTrampInfo **info, gboolean load_imt_reg, int offset)
7942 {
7943         guint8 *code, *start;
7944         int size = 20;
7945         char *tramp_name;
7946         GSList *unwind_ops;
7947
7948         if (offset / (int)sizeof (gpointer) > MAX_VIRTUAL_DELEGATE_OFFSET)
7949                 return NULL;
7950
7951         start = code = mono_global_codeman_reserve (size);
7952
7953         unwind_ops = mono_arch_get_cie_program ();
7954
7955         /* Replace the this argument with the target */
7956         amd64_mov_reg_reg (code, AMD64_RAX, AMD64_ARG_REG1, 8);
7957         amd64_mov_reg_membase (code, AMD64_ARG_REG1, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, target), 8);
7958
7959         if (load_imt_reg) {
7960                 /* Load the IMT reg */
7961                 amd64_mov_reg_membase (code, MONO_ARCH_IMT_REG, AMD64_RAX, MONO_STRUCT_OFFSET (MonoDelegate, method), 8);
7962         }
7963
7964         /* Load the vtable */
7965         amd64_mov_reg_membase (code, AMD64_RAX, AMD64_ARG_REG1, MONO_STRUCT_OFFSET (MonoObject, vtable), 8);
7966         amd64_jump_membase (code, AMD64_RAX, offset);
7967         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_DELEGATE_INVOKE, NULL);
7968
7969         if (load_imt_reg)
7970                 tramp_name = g_strdup_printf ("delegate_virtual_invoke_imt_%d", - offset / sizeof (gpointer));
7971         else
7972                 tramp_name = g_strdup_printf ("delegate_virtual_invoke_%d", offset / sizeof (gpointer));
7973         *info = mono_tramp_info_create (tramp_name, start, code - start, NULL, unwind_ops);
7974         g_free (tramp_name);
7975
7976         return start;
7977 }
7978
7979 /*
7980  * mono_arch_get_delegate_invoke_impls:
7981  *
7982  *   Return a list of MonoTrampInfo structures for the delegate invoke impl
7983  * trampolines.
7984  */
7985 GSList*
7986 mono_arch_get_delegate_invoke_impls (void)
7987 {
7988         GSList *res = NULL;
7989         MonoTrampInfo *info;
7990         int i;
7991
7992         get_delegate_invoke_impl (&info, TRUE, 0);
7993         res = g_slist_prepend (res, info);
7994
7995         for (i = 0; i <= MAX_ARCH_DELEGATE_PARAMS; ++i) {
7996                 get_delegate_invoke_impl (&info, FALSE, i);
7997                 res = g_slist_prepend (res, info);
7998         }
7999
8000         for (i = 0; i <= MAX_VIRTUAL_DELEGATE_OFFSET; ++i) {
8001                 get_delegate_virtual_invoke_impl (&info, TRUE, - i * SIZEOF_VOID_P);
8002                 res = g_slist_prepend (res, info);
8003
8004                 get_delegate_virtual_invoke_impl (&info, FALSE, i * SIZEOF_VOID_P);
8005                 res = g_slist_prepend (res, info);
8006         }
8007
8008         return res;
8009 }
8010
8011 gpointer
8012 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
8013 {
8014         guint8 *code, *start;
8015         int i;
8016
8017         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
8018                 return NULL;
8019
8020         /* FIXME: Support more cases */
8021         if (MONO_TYPE_ISSTRUCT (mini_get_underlying_type (sig->ret)))
8022                 return NULL;
8023
8024         if (has_target) {
8025                 static guint8* cached = NULL;
8026
8027                 if (cached)
8028                         return cached;
8029
8030                 if (mono_aot_only) {
8031                         start = mono_aot_get_trampoline ("delegate_invoke_impl_has_target");
8032                 } else {
8033                         MonoTrampInfo *info;
8034                         start = get_delegate_invoke_impl (&info, TRUE, 0);
8035                         mono_tramp_info_register (info, NULL);
8036                 }
8037
8038                 mono_memory_barrier ();
8039
8040                 cached = start;
8041         } else {
8042                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
8043                 for (i = 0; i < sig->param_count; ++i)
8044                         if (!mono_is_regsize_var (sig->params [i]))
8045                                 return NULL;
8046                 if (sig->param_count > 4)
8047                         return NULL;
8048
8049                 code = cache [sig->param_count];
8050                 if (code)
8051                         return code;
8052
8053                 if (mono_aot_only) {
8054                         char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count);
8055                         start = mono_aot_get_trampoline (name);
8056                         g_free (name);
8057                 } else {
8058                         MonoTrampInfo *info;
8059                         start = get_delegate_invoke_impl (&info, FALSE, sig->param_count);
8060                         mono_tramp_info_register (info, NULL);
8061                 }
8062
8063                 mono_memory_barrier ();
8064
8065                 cache [sig->param_count] = start;
8066         }
8067
8068         return start;
8069 }
8070
8071 gpointer
8072 mono_arch_get_delegate_virtual_invoke_impl (MonoMethodSignature *sig, MonoMethod *method, int offset, gboolean load_imt_reg)
8073 {
8074         MonoTrampInfo *info;
8075         gpointer code;
8076
8077         code = get_delegate_virtual_invoke_impl (&info, load_imt_reg, offset);
8078         if (code)
8079                 mono_tramp_info_register (info, NULL);
8080         return code;
8081 }
8082
8083 void
8084 mono_arch_finish_init (void)
8085 {
8086 #if !defined(HOST_WIN32) && defined(MONO_XEN_OPT)
8087         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
8088 #endif
8089 }
8090
8091 void
8092 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
8093 {
8094 }
8095
8096 #if defined(__default_codegen__)
8097 #define CMP_SIZE (6 + 1)
8098 #define CMP_REG_REG_SIZE (4 + 1)
8099 #define BR_SMALL_SIZE 2
8100 #define BR_LARGE_SIZE 6
8101 #define MOV_REG_IMM_SIZE 10
8102 #define MOV_REG_IMM_32BIT_SIZE 6
8103 #define JUMP_REG_SIZE (2 + 1)
8104 #elif defined(__native_client_codegen__)
8105 /* NaCl N-byte instructions can be padded up to N-1 bytes */
8106 #define CMP_SIZE ((6 + 1) * 2 - 1)
8107 #define CMP_REG_REG_SIZE ((4 + 1) * 2 - 1)
8108 #define BR_SMALL_SIZE (2 * 2 - 1)
8109 #define BR_LARGE_SIZE (6 * 2 - 1)
8110 #define MOV_REG_IMM_SIZE (10 * 2 - 1)
8111 #define MOV_REG_IMM_32BIT_SIZE (6 * 2 - 1)
8112 /* Jump reg for NaCl adds a mask (+4) and add (+3) */
8113 #define JUMP_REG_SIZE ((2 + 1 + 4 + 3) * 2 - 1)
8114 /* Jump membase's size is large and unpredictable    */
8115 /* in native client, just pad it out a whole bundle. */
8116 #define JUMP_MEMBASE_SIZE (kNaClAlignment)
8117 #endif
8118
8119 static int
8120 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
8121 {
8122         int i, distance = 0;
8123         for (i = start; i < target; ++i)
8124                 distance += imt_entries [i]->chunk_size;
8125         return distance;
8126 }
8127
8128 /*
8129  * LOCKING: called with the domain lock held
8130  */
8131 gpointer
8132 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
8133         gpointer fail_tramp)
8134 {
8135         int i;
8136         int size = 0;
8137         guint8 *code, *start;
8138         gboolean vtable_is_32bit = ((gsize)(vtable) == (gsize)(int)(gsize)(vtable));
8139         GSList *unwind_ops;
8140
8141         for (i = 0; i < count; ++i) {
8142                 MonoIMTCheckItem *item = imt_entries [i];
8143                 if (item->is_equals) {
8144                         if (item->check_target_idx) {
8145                                 if (!item->compare_done) {
8146                                         if (amd64_use_imm32 ((gint64)item->key))
8147                                                 item->chunk_size += CMP_SIZE;
8148                                         else
8149                                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
8150                                 }
8151                                 if (item->has_target_code) {
8152                                         item->chunk_size += MOV_REG_IMM_SIZE;
8153                                 } else {
8154                                         if (vtable_is_32bit)
8155                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
8156                                         else
8157                                                 item->chunk_size += MOV_REG_IMM_SIZE;
8158 #ifdef __native_client_codegen__
8159                                         item->chunk_size += JUMP_MEMBASE_SIZE;
8160 #endif
8161                                 }
8162                                 item->chunk_size += BR_SMALL_SIZE + JUMP_REG_SIZE;
8163                         } else {
8164                                 if (fail_tramp) {
8165                                         item->chunk_size += MOV_REG_IMM_SIZE * 3 + CMP_REG_REG_SIZE +
8166                                                 BR_SMALL_SIZE + JUMP_REG_SIZE * 2;
8167                                 } else {
8168                                         if (vtable_is_32bit)
8169                                                 item->chunk_size += MOV_REG_IMM_32BIT_SIZE;
8170                                         else
8171                                                 item->chunk_size += MOV_REG_IMM_SIZE;
8172                                         item->chunk_size += JUMP_REG_SIZE;
8173                                         /* with assert below:
8174                                          * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
8175                                          */
8176 #ifdef __native_client_codegen__
8177                                         item->chunk_size += JUMP_MEMBASE_SIZE;
8178 #endif
8179                                 }
8180                         }
8181                 } else {
8182                         if (amd64_use_imm32 ((gint64)item->key))
8183                                 item->chunk_size += CMP_SIZE;
8184                         else
8185                                 item->chunk_size += MOV_REG_IMM_SIZE + CMP_REG_REG_SIZE;
8186                         item->chunk_size += BR_LARGE_SIZE;
8187                         imt_entries [item->check_target_idx]->compare_done = TRUE;
8188                 }
8189                 size += item->chunk_size;
8190         }
8191 #if defined(__native_client__) && defined(__native_client_codegen__)
8192         /* In Native Client, we don't re-use thunks, allocate from the */
8193         /* normal code manager paths. */
8194         code = mono_domain_code_reserve (domain, size);
8195 #else
8196         if (fail_tramp)
8197                 code = mono_method_alloc_generic_virtual_thunk (domain, size);
8198         else
8199                 code = mono_domain_code_reserve (domain, size);
8200 #endif
8201         start = code;
8202
8203         unwind_ops = mono_arch_get_cie_program ();
8204
8205         for (i = 0; i < count; ++i) {
8206                 MonoIMTCheckItem *item = imt_entries [i];
8207                 item->code_target = code;
8208                 if (item->is_equals) {
8209                         gboolean fail_case = !item->check_target_idx && fail_tramp;
8210
8211                         if (item->check_target_idx || fail_case) {
8212                                 if (!item->compare_done || fail_case) {
8213                                         if (amd64_use_imm32 ((gint64)item->key))
8214                                                 amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof(gpointer));
8215                                         else {
8216                                                 amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_SCRATCH_REG, item->key, sizeof(gpointer));
8217                                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
8218                                         }
8219                                 }
8220                                 item->jmp_code = code;
8221                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
8222                                 if (item->has_target_code) {
8223                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->value.target_code);
8224                                         amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG);
8225                                 } else {
8226                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
8227                                         amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
8228                                 }
8229
8230                                 if (fail_case) {
8231                                         amd64_patch (item->jmp_code, code);
8232                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, fail_tramp);
8233                                         amd64_jump_reg (code, MONO_ARCH_IMT_SCRATCH_REG);
8234                                         item->jmp_code = NULL;
8235                                 }
8236                         } else {
8237                                 /* enable the commented code to assert on wrong method */
8238 #if 0
8239                                 if (amd64_is_imm32 (item->key))
8240                                         amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof(gpointer));
8241                                 else {
8242                                         amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, item->key);
8243                                         amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
8244                                 }
8245                                 item->jmp_code = code;
8246                                 amd64_branch8 (code, X86_CC_NE, 0, FALSE);
8247                                 /* See the comment below about R10 */
8248                                 amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
8249                                 amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
8250                                 amd64_patch (item->jmp_code, code);
8251                                 amd64_breakpoint (code);
8252                                 item->jmp_code = NULL;
8253 #else
8254                                 /* We're using R10 (MONO_ARCH_IMT_SCRATCH_REG) here because R11 (MONO_ARCH_IMT_REG)
8255                                    needs to be preserved.  R10 needs
8256                                    to be preserved for calls which
8257                                    require a runtime generic context,
8258                                    but interface calls don't. */
8259                                 amd64_mov_reg_imm (code, MONO_ARCH_IMT_SCRATCH_REG, & (vtable->vtable [item->value.vtable_slot]));
8260                                 amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
8261 #endif
8262                         }
8263                 } else {
8264                         if (amd64_use_imm32 ((gint64)item->key))
8265                                 amd64_alu_reg_imm_size (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)(gssize)item->key, sizeof (gpointer));
8266                         else {
8267                                 amd64_mov_reg_imm_size (code, MONO_ARCH_IMT_SCRATCH_REG, item->key, sizeof (gpointer));
8268                                 amd64_alu_reg_reg (code, X86_CMP, MONO_ARCH_IMT_REG, MONO_ARCH_IMT_SCRATCH_REG);
8269                         }
8270                         item->jmp_code = code;
8271                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
8272                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
8273                         else
8274                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
8275                 }
8276                 g_assert (code - item->code_target <= item->chunk_size);
8277         }
8278         /* patch the branches to get to the target items */
8279         for (i = 0; i < count; ++i) {
8280                 MonoIMTCheckItem *item = imt_entries [i];
8281                 if (item->jmp_code) {
8282                         if (item->check_target_idx) {
8283                                 amd64_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
8284                         }
8285                 }
8286         }
8287
8288         if (!fail_tramp)
8289                 mono_stats.imt_thunks_size += code - start;
8290         g_assert (code - start <= size);
8291
8292         nacl_domain_code_validate(domain, &start, size, &code);
8293         mono_profiler_code_buffer_new (start, code - start, MONO_PROFILER_CODE_BUFFER_IMT_TRAMPOLINE, NULL);
8294
8295         mono_tramp_info_register (mono_tramp_info_create (NULL, start, code - start, NULL, unwind_ops), domain);
8296
8297         return start;
8298 }
8299
8300 MonoMethod*
8301 mono_arch_find_imt_method (mgreg_t *regs, guint8 *code)
8302 {
8303         return (MonoMethod*)regs [MONO_ARCH_IMT_REG];
8304 }
8305
8306 MonoVTable*
8307 mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code)
8308 {
8309         return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
8310 }
8311
8312 GSList*
8313 mono_arch_get_cie_program (void)
8314 {
8315         GSList *l = NULL;
8316
8317         mono_add_unwind_op_def_cfa (l, (guint8*)NULL, (guint8*)NULL, AMD64_RSP, 8);
8318         mono_add_unwind_op_offset (l, (guint8*)NULL, (guint8*)NULL, AMD64_RIP, -8);
8319
8320         return l;
8321 }
8322
8323 #ifndef DISABLE_JIT
8324
8325 MonoInst*
8326 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
8327 {
8328         MonoInst *ins = NULL;
8329         int opcode = 0;
8330
8331         if (cmethod->klass == mono_defaults.math_class) {
8332                 if (strcmp (cmethod->name, "Sin") == 0) {
8333                         opcode = OP_SIN;
8334                 } else if (strcmp (cmethod->name, "Cos") == 0) {
8335                         opcode = OP_COS;
8336                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
8337                         opcode = OP_SQRT;
8338                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
8339                         opcode = OP_ABS;
8340                 }
8341                 
8342                 if (opcode && fsig->param_count == 1) {
8343                         MONO_INST_NEW (cfg, ins, opcode);
8344                         ins->type = STACK_R8;
8345                         ins->dreg = mono_alloc_freg (cfg);
8346                         ins->sreg1 = args [0]->dreg;
8347                         MONO_ADD_INS (cfg->cbb, ins);
8348                 }
8349
8350                 opcode = 0;
8351                 if (cfg->opt & MONO_OPT_CMOV) {
8352                         if (strcmp (cmethod->name, "Min") == 0) {
8353                                 if (fsig->params [0]->type == MONO_TYPE_I4)
8354                                         opcode = OP_IMIN;
8355                                 if (fsig->params [0]->type == MONO_TYPE_U4)
8356                                         opcode = OP_IMIN_UN;
8357                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
8358                                         opcode = OP_LMIN;
8359                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
8360                                         opcode = OP_LMIN_UN;
8361                         } else if (strcmp (cmethod->name, "Max") == 0) {
8362                                 if (fsig->params [0]->type == MONO_TYPE_I4)
8363                                         opcode = OP_IMAX;
8364                                 if (fsig->params [0]->type == MONO_TYPE_U4)
8365                                         opcode = OP_IMAX_UN;
8366                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
8367                                         opcode = OP_LMAX;
8368                                 else if (fsig->params [0]->type == MONO_TYPE_U8)
8369                                         opcode = OP_LMAX_UN;
8370                         }
8371                 }
8372                 
8373                 if (opcode && fsig->param_count == 2) {
8374                         MONO_INST_NEW (cfg, ins, opcode);
8375                         ins->type = fsig->params [0]->type == MONO_TYPE_I4 ? STACK_I4 : STACK_I8;
8376                         ins->dreg = mono_alloc_ireg (cfg);
8377                         ins->sreg1 = args [0]->dreg;
8378                         ins->sreg2 = args [1]->dreg;
8379                         MONO_ADD_INS (cfg->cbb, ins);
8380                 }
8381
8382 #if 0
8383                 /* OP_FREM is not IEEE compatible */
8384                 else if (strcmp (cmethod->name, "IEEERemainder") == 0 && fsig->param_count == 2) {
8385                         MONO_INST_NEW (cfg, ins, OP_FREM);
8386                         ins->inst_i0 = args [0];
8387                         ins->inst_i1 = args [1];
8388                 }
8389 #endif
8390         }
8391
8392         return ins;
8393 }
8394 #endif
8395
8396 gboolean
8397 mono_arch_print_tree (MonoInst *tree, int arity)
8398 {
8399         return 0;
8400 }
8401
8402 mgreg_t
8403 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
8404 {
8405         return ctx->gregs [reg];
8406 }
8407
8408 void
8409 mono_arch_context_set_int_reg (MonoContext *ctx, int reg, mgreg_t val)
8410 {
8411         ctx->gregs [reg] = val;
8412 }
8413
8414 gpointer
8415 mono_arch_install_handler_block_guard (MonoJitInfo *ji, MonoJitExceptionInfo *clause, MonoContext *ctx, gpointer new_value)
8416 {
8417         gpointer *sp, old_value;
8418         char *bp;
8419
8420         /*Load the spvar*/
8421         bp = MONO_CONTEXT_GET_BP (ctx);
8422         sp = *(gpointer*)(bp + clause->exvar_offset);
8423
8424         old_value = *sp;
8425         if (old_value < ji->code_start || (char*)old_value > ((char*)ji->code_start + ji->code_size))
8426                 return old_value;
8427
8428         *sp = new_value;
8429
8430         return old_value;
8431 }
8432
8433 /*
8434  * mono_arch_emit_load_aotconst:
8435  *
8436  *   Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and
8437  * TARGET from the mscorlib GOT in full-aot code.
8438  * On AMD64, the result is placed into R11.
8439  */
8440 guint8*
8441 mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target)
8442 {
8443         *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target);
8444         amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, 8);
8445
8446         return code;
8447 }
8448
8449 /*
8450  * mono_arch_get_trampolines:
8451  *
8452  *   Return a list of MonoTrampInfo structures describing arch specific trampolines
8453  * for AOT.
8454  */
8455 GSList *
8456 mono_arch_get_trampolines (gboolean aot)
8457 {
8458         return mono_amd64_get_exception_trampolines (aot);
8459 }
8460
8461 /* Soft Debug support */
8462 #ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED
8463
8464 /*
8465  * mono_arch_set_breakpoint:
8466  *
8467  *   Set a breakpoint at the native code corresponding to JI at NATIVE_OFFSET.
8468  * The location should contain code emitted by OP_SEQ_POINT.
8469  */
8470 void
8471 mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip)
8472 {
8473         guint8 *code = ip;
8474         guint8 *orig_code = code;
8475
8476         if (ji->from_aot) {
8477                 guint32 native_offset = ip - (guint8*)ji->code_start;
8478                 SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
8479
8480                 g_assert (info->bp_addrs [native_offset] == 0);
8481                 info->bp_addrs [native_offset] = mini_get_breakpoint_trampoline ();
8482         } else {
8483                 /* 
8484                  * In production, we will use int3 (has to fix the size in the md 
8485                  * file). But that could confuse gdb, so during development, we emit a SIGSEGV
8486                  * instead.
8487                  */
8488                 g_assert (code [0] == 0x90);
8489                 if (breakpoint_size == 8) {
8490                         amd64_mov_reg_mem (code, AMD64_R11, (guint64)bp_trigger_page, 4);
8491                 } else {
8492                         amd64_mov_reg_imm_size (code, AMD64_R11, (guint64)bp_trigger_page, 8);
8493                         amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, 0, 4);
8494                 }
8495
8496                 g_assert (code - orig_code == breakpoint_size);
8497         }
8498 }
8499
8500 /*
8501  * mono_arch_clear_breakpoint:
8502  *
8503  *   Clear the breakpoint at IP.
8504  */
8505 void
8506 mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip)
8507 {
8508         guint8 *code = ip;
8509         int i;
8510
8511         if (ji->from_aot) {
8512                 guint32 native_offset = ip - (guint8*)ji->code_start;
8513                 SeqPointInfo *info = mono_arch_get_seq_point_info (mono_domain_get (), ji->code_start);
8514
8515                 info->bp_addrs [native_offset] = NULL;
8516         } else {
8517                 for (i = 0; i < breakpoint_size; ++i)
8518                         x86_nop (code);
8519         }
8520 }
8521
8522 gboolean
8523 mono_arch_is_breakpoint_event (void *info, void *sigctx)
8524 {
8525 #ifdef HOST_WIN32
8526         EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord;
8527         if (einfo->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && (gpointer)einfo->ExceptionInformation [1] == bp_trigger_page)
8528                 return TRUE;
8529         else
8530                 return FALSE;
8531 #else
8532         siginfo_t* sinfo = (siginfo_t*) info;
8533         /* Sometimes the address is off by 4 */
8534         if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128)
8535                 return TRUE;
8536         else
8537                 return FALSE;
8538 #endif
8539 }
8540
8541 /*
8542  * mono_arch_skip_breakpoint:
8543  *
8544  *   Modify CTX so the ip is placed after the breakpoint instruction, so when
8545  * we resume, the instruction is not executed again.
8546  */
8547 void
8548 mono_arch_skip_breakpoint (MonoContext *ctx, MonoJitInfo *ji)
8549 {
8550         if (ji->from_aot) {
8551                 /* The breakpoint instruction is a call */
8552         } else {
8553                 MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + breakpoint_fault_size);
8554         }
8555 }
8556         
8557 /*
8558  * mono_arch_start_single_stepping:
8559  *
8560  *   Start single stepping.
8561  */
8562 void
8563 mono_arch_start_single_stepping (void)
8564 {
8565         mono_mprotect (ss_trigger_page, mono_pagesize (), 0);
8566         ss_trampoline = mini_get_single_step_trampoline ();
8567 }
8568         
8569 /*
8570  * mono_arch_stop_single_stepping:
8571  *
8572  *   Stop single stepping.
8573  */
8574 void
8575 mono_arch_stop_single_stepping (void)
8576 {
8577         mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ);
8578         ss_trampoline = NULL;
8579 }
8580
8581 /*
8582  * mono_arch_is_single_step_event:
8583  *
8584  *   Return whenever the machine state in SIGCTX corresponds to a single
8585  * step event.
8586  */
8587 gboolean
8588 mono_arch_is_single_step_event (void *info, void *sigctx)
8589 {
8590 #ifdef HOST_WIN32
8591         EXCEPTION_RECORD* einfo = ((EXCEPTION_POINTERS*)info)->ExceptionRecord;
8592         if (einfo->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && (gpointer)einfo->ExceptionInformation [1] == ss_trigger_page)
8593                 return TRUE;
8594         else
8595                 return FALSE;
8596 #else
8597         siginfo_t* sinfo = (siginfo_t*) info;
8598         /* Sometimes the address is off by 4 */
8599         if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128)
8600                 return TRUE;
8601         else
8602                 return FALSE;
8603 #endif
8604 }
8605
8606 /*
8607  * mono_arch_skip_single_step:
8608  *
8609  *   Modify CTX so the ip is placed after the single step trigger instruction,
8610  * we resume, the instruction is not executed again.
8611  */
8612 void
8613 mono_arch_skip_single_step (MonoContext *ctx)
8614 {
8615         MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + single_step_fault_size);
8616 }
8617
8618 /*
8619  * mono_arch_create_seq_point_info:
8620  *
8621  *   Return a pointer to a data structure which is used by the sequence
8622  * point implementation in AOTed code.
8623  */
8624 gpointer
8625 mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code)
8626 {
8627         SeqPointInfo *info;
8628         MonoJitInfo *ji;
8629
8630         // FIXME: Add a free function
8631
8632         mono_domain_lock (domain);
8633         info = g_hash_table_lookup (domain_jit_info (domain)->arch_seq_points,
8634                                                                 code);
8635         mono_domain_unlock (domain);
8636
8637         if (!info) {
8638                 ji = mono_jit_info_table_find (domain, (char*)code);
8639                 g_assert (ji);
8640
8641                 // FIXME: Optimize the size
8642                 info = g_malloc0 (sizeof (SeqPointInfo) + (ji->code_size * sizeof (gpointer)));
8643
8644                 info->ss_tramp_addr = &ss_trampoline;
8645
8646                 mono_domain_lock (domain);
8647                 g_hash_table_insert (domain_jit_info (domain)->arch_seq_points,
8648                                                          code, info);
8649                 mono_domain_unlock (domain);
8650         }
8651
8652         return info;
8653 }
8654
8655 void
8656 mono_arch_init_lmf_ext (MonoLMFExt *ext, gpointer prev_lmf)
8657 {
8658         ext->lmf.previous_lmf = prev_lmf;
8659         /* Mark that this is a MonoLMFExt */
8660         ext->lmf.previous_lmf = (gpointer)(((gssize)ext->lmf.previous_lmf) | 2);
8661         ext->lmf.rsp = (gssize)ext;
8662 }
8663
8664 #endif
8665
8666 gboolean
8667 mono_arch_opcode_supported (int opcode)
8668 {
8669         switch (opcode) {
8670         case OP_ATOMIC_ADD_I4:
8671         case OP_ATOMIC_ADD_I8:
8672         case OP_ATOMIC_EXCHANGE_I4:
8673         case OP_ATOMIC_EXCHANGE_I8:
8674         case OP_ATOMIC_CAS_I4:
8675         case OP_ATOMIC_CAS_I8:
8676         case OP_ATOMIC_LOAD_I1:
8677         case OP_ATOMIC_LOAD_I2:
8678         case OP_ATOMIC_LOAD_I4:
8679         case OP_ATOMIC_LOAD_I8:
8680         case OP_ATOMIC_LOAD_U1:
8681         case OP_ATOMIC_LOAD_U2:
8682         case OP_ATOMIC_LOAD_U4:
8683         case OP_ATOMIC_LOAD_U8:
8684         case OP_ATOMIC_LOAD_R4:
8685         case OP_ATOMIC_LOAD_R8:
8686         case OP_ATOMIC_STORE_I1:
8687         case OP_ATOMIC_STORE_I2:
8688         case OP_ATOMIC_STORE_I4:
8689         case OP_ATOMIC_STORE_I8:
8690         case OP_ATOMIC_STORE_U1:
8691         case OP_ATOMIC_STORE_U2:
8692         case OP_ATOMIC_STORE_U4:
8693         case OP_ATOMIC_STORE_U8:
8694         case OP_ATOMIC_STORE_R4:
8695         case OP_ATOMIC_STORE_R8:
8696                 return TRUE;
8697         default:
8698                 return FALSE;
8699         }
8700 }
8701
8702 #if defined(ENABLE_GSHAREDVT)
8703
8704 #include "../../../mono-extensions/mono/mini/mini-amd64-gsharedvt.c"
8705
8706 #endif /* !MONOTOUCH */