2008-11-13 Rodrigo Kumpera <rkumpera@novell.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
25
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "inssel.h"
29 #include "cpu-x86.h"
30
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint lmf_addr_tls_offset = -1;
34 static gint appdomain_tls_offset = -1;
35 static gint thread_tls_offset = -1;
36
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
42
43 #ifdef PLATFORM_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
48
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
53
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
55
56 #define ARGS_OFFSET 8
57
58 #ifdef PLATFORM_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
64
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
67
68 const char*
69 mono_arch_regname (int reg)
70 {
71         switch (reg) {
72         case X86_EAX: return "%eax";
73         case X86_EBX: return "%ebx";
74         case X86_ECX: return "%ecx";
75         case X86_EDX: return "%edx";
76         case X86_ESP: return "%esp";    
77         case X86_EBP: return "%ebp";
78         case X86_EDI: return "%edi";
79         case X86_ESI: return "%esi";
80         }
81         return "unknown";
82 }
83
84 const char*
85 mono_arch_fregname (int reg)
86 {
87         switch (reg) {
88         case 0:
89                 return "%fr0";
90         case 1:
91                 return "%fr1";
92         case 2:
93                 return "%fr2";
94         case 3:
95                 return "%fr3";
96         case 4:
97                 return "%fr4";
98         case 5:
99                 return "%fr5";
100         case 6:
101                 return "%fr6";
102         case 7:
103                 return "%fr7";
104         default:
105                 return "unknown";
106         }
107 }
108
109 const char *
110 mono_arch_xregname (int reg)
111 {
112         switch (reg) {
113         case 0:
114                 return "%xmm0";
115         case 1:
116                 return "%xmm1";
117         case 2:
118                 return "%xmm2";
119         case 3:
120                 return "%xmm3";
121         case 4:
122                 return "%xmm4";
123         case 5:
124                 return "%xmm5";
125         case 6:
126                 return "%xmm6";
127         case 7:
128                 return "%xmm7";
129         default:
130                 return "unknown";
131         }
132 }
133
134
135 typedef enum {
136         ArgInIReg,
137         ArgInFloatSSEReg,
138         ArgInDoubleSSEReg,
139         ArgOnStack,
140         ArgValuetypeInReg,
141         ArgOnFloatFpStack,
142         ArgOnDoubleFpStack,
143         ArgNone
144 } ArgStorage;
145
146 typedef struct {
147         gint16 offset;
148         gint8  reg;
149         ArgStorage storage;
150
151         /* Only if storage == ArgValuetypeInReg */
152         ArgStorage pair_storage [2];
153         gint8 pair_regs [2];
154 } ArgInfo;
155
156 typedef struct {
157         int nargs;
158         guint32 stack_usage;
159         guint32 reg_usage;
160         guint32 freg_usage;
161         gboolean need_stack_align;
162         guint32 stack_align_amount;
163         ArgInfo ret;
164         ArgInfo sig_cookie;
165         ArgInfo args [1];
166 } CallInfo;
167
168 #define PARAM_REGS 0
169
170 #define FLOAT_PARAM_REGS 0
171
172 static X86_Reg_No param_regs [] = { 0 };
173
174 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
175 #define SMALL_STRUCTS_IN_REGS
176 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
177 #endif
178
179 static void inline
180 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
181 {
182     ainfo->offset = *stack_size;
183
184     if (*gr >= PARAM_REGS) {
185                 ainfo->storage = ArgOnStack;
186                 (*stack_size) += sizeof (gpointer);
187     }
188     else {
189                 ainfo->storage = ArgInIReg;
190                 ainfo->reg = param_regs [*gr];
191                 (*gr) ++;
192     }
193 }
194
195 static void inline
196 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
197 {
198         ainfo->offset = *stack_size;
199
200         g_assert (PARAM_REGS == 0);
201         
202         ainfo->storage = ArgOnStack;
203         (*stack_size) += sizeof (gpointer) * 2;
204 }
205
206 static void inline
207 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
208 {
209     ainfo->offset = *stack_size;
210
211     if (*gr >= FLOAT_PARAM_REGS) {
212                 ainfo->storage = ArgOnStack;
213                 (*stack_size) += is_double ? 8 : 4;
214     }
215     else {
216                 /* A double register */
217                 if (is_double)
218                         ainfo->storage = ArgInDoubleSSEReg;
219                 else
220                         ainfo->storage = ArgInFloatSSEReg;
221                 ainfo->reg = *gr;
222                 (*gr) += 1;
223     }
224 }
225
226
227 static void
228 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
229                gboolean is_return,
230                guint32 *gr, guint32 *fr, guint32 *stack_size)
231 {
232         guint32 size;
233         MonoClass *klass;
234
235         klass = mono_class_from_mono_type (type);
236         size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
237
238 #ifdef SMALL_STRUCTS_IN_REGS
239         if (sig->pinvoke && is_return) {
240                 MonoMarshalType *info;
241
242                 /*
243                  * the exact rules are not very well documented, the code below seems to work with the 
244                  * code generated by gcc 3.3.3 -mno-cygwin.
245                  */
246                 info = mono_marshal_load_type_info (klass);
247                 g_assert (info);
248
249                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
250
251                 /* Special case structs with only a float member */
252                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
253                         ainfo->storage = ArgValuetypeInReg;
254                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
255                         return;
256                 }
257                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
258                         ainfo->storage = ArgValuetypeInReg;
259                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
260                         return;
261                 }               
262                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
263                         ainfo->storage = ArgValuetypeInReg;
264                         ainfo->pair_storage [0] = ArgInIReg;
265                         ainfo->pair_regs [0] = return_regs [0];
266                         if (info->native_size > 4) {
267                                 ainfo->pair_storage [1] = ArgInIReg;
268                                 ainfo->pair_regs [1] = return_regs [1];
269                         }
270                         return;
271                 }
272         }
273 #endif
274
275         ainfo->offset = *stack_size;
276         ainfo->storage = ArgOnStack;
277         *stack_size += ALIGN_TO (size, sizeof (gpointer));
278 }
279
280 /*
281  * get_call_info:
282  *
283  *  Obtain information about a call according to the calling convention.
284  * For x86 ELF, see the "System V Application Binary Interface Intel386 
285  * Architecture Processor Supplment, Fourth Edition" document for more
286  * information.
287  * For x86 win32, see ???.
288  */
289 static CallInfo*
290 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
291 {
292         guint32 i, gr, fr;
293         MonoType *ret_type;
294         int n = sig->hasthis + sig->param_count;
295         guint32 stack_size = 0;
296         CallInfo *cinfo;
297
298         if (mp)
299                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
300         else
301                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
302
303         gr = 0;
304         fr = 0;
305
306         /* return value */
307         {
308                 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
309                 switch (ret_type->type) {
310                 case MONO_TYPE_BOOLEAN:
311                 case MONO_TYPE_I1:
312                 case MONO_TYPE_U1:
313                 case MONO_TYPE_I2:
314                 case MONO_TYPE_U2:
315                 case MONO_TYPE_CHAR:
316                 case MONO_TYPE_I4:
317                 case MONO_TYPE_U4:
318                 case MONO_TYPE_I:
319                 case MONO_TYPE_U:
320                 case MONO_TYPE_PTR:
321                 case MONO_TYPE_FNPTR:
322                 case MONO_TYPE_CLASS:
323                 case MONO_TYPE_OBJECT:
324                 case MONO_TYPE_SZARRAY:
325                 case MONO_TYPE_ARRAY:
326                 case MONO_TYPE_STRING:
327                         cinfo->ret.storage = ArgInIReg;
328                         cinfo->ret.reg = X86_EAX;
329                         break;
330                 case MONO_TYPE_U8:
331                 case MONO_TYPE_I8:
332                         cinfo->ret.storage = ArgInIReg;
333                         cinfo->ret.reg = X86_EAX;
334                         break;
335                 case MONO_TYPE_R4:
336                         cinfo->ret.storage = ArgOnFloatFpStack;
337                         break;
338                 case MONO_TYPE_R8:
339                         cinfo->ret.storage = ArgOnDoubleFpStack;
340                         break;
341                 case MONO_TYPE_GENERICINST:
342                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
343                                 cinfo->ret.storage = ArgInIReg;
344                                 cinfo->ret.reg = X86_EAX;
345                                 break;
346                         }
347                         /* Fall through */
348                 case MONO_TYPE_VALUETYPE: {
349                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
350
351                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
352                         if (cinfo->ret.storage == ArgOnStack)
353                                 /* The caller passes the address where the value is stored */
354                                 add_general (&gr, &stack_size, &cinfo->ret);
355                         break;
356                 }
357                 case MONO_TYPE_TYPEDBYREF:
358                         /* Same as a valuetype with size 24 */
359                         add_general (&gr, &stack_size, &cinfo->ret);
360                         ;
361                         break;
362                 case MONO_TYPE_VOID:
363                         cinfo->ret.storage = ArgNone;
364                         break;
365                 default:
366                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
367                 }
368         }
369
370         /* this */
371         if (sig->hasthis)
372                 add_general (&gr, &stack_size, cinfo->args + 0);
373
374         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
375                 gr = PARAM_REGS;
376                 fr = FLOAT_PARAM_REGS;
377                 
378                 /* Emit the signature cookie just before the implicit arguments */
379                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
380         }
381
382         for (i = 0; i < sig->param_count; ++i) {
383                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
384                 MonoType *ptype;
385
386                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
387                         /* We allways pass the sig cookie on the stack for simplicity */
388                         /* 
389                          * Prevent implicit arguments + the sig cookie from being passed 
390                          * in registers.
391                          */
392                         gr = PARAM_REGS;
393                         fr = FLOAT_PARAM_REGS;
394
395                         /* Emit the signature cookie just before the implicit arguments */
396                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
397                 }
398
399                 if (sig->params [i]->byref) {
400                         add_general (&gr, &stack_size, ainfo);
401                         continue;
402                 }
403                 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
404                 switch (ptype->type) {
405                 case MONO_TYPE_BOOLEAN:
406                 case MONO_TYPE_I1:
407                 case MONO_TYPE_U1:
408                         add_general (&gr, &stack_size, ainfo);
409                         break;
410                 case MONO_TYPE_I2:
411                 case MONO_TYPE_U2:
412                 case MONO_TYPE_CHAR:
413                         add_general (&gr, &stack_size, ainfo);
414                         break;
415                 case MONO_TYPE_I4:
416                 case MONO_TYPE_U4:
417                         add_general (&gr, &stack_size, ainfo);
418                         break;
419                 case MONO_TYPE_I:
420                 case MONO_TYPE_U:
421                 case MONO_TYPE_PTR:
422                 case MONO_TYPE_FNPTR:
423                 case MONO_TYPE_CLASS:
424                 case MONO_TYPE_OBJECT:
425                 case MONO_TYPE_STRING:
426                 case MONO_TYPE_SZARRAY:
427                 case MONO_TYPE_ARRAY:
428                         add_general (&gr, &stack_size, ainfo);
429                         break;
430                 case MONO_TYPE_GENERICINST:
431                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
432                                 add_general (&gr, &stack_size, ainfo);
433                                 break;
434                         }
435                         /* Fall through */
436                 case MONO_TYPE_VALUETYPE:
437                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
438                         break;
439                 case MONO_TYPE_TYPEDBYREF:
440                         stack_size += sizeof (MonoTypedRef);
441                         ainfo->storage = ArgOnStack;
442                         break;
443                 case MONO_TYPE_U8:
444                 case MONO_TYPE_I8:
445                         add_general_pair (&gr, &stack_size, ainfo);
446                         break;
447                 case MONO_TYPE_R4:
448                         add_float (&fr, &stack_size, ainfo, FALSE);
449                         break;
450                 case MONO_TYPE_R8:
451                         add_float (&fr, &stack_size, ainfo, TRUE);
452                         break;
453                 default:
454                         g_error ("unexpected type 0x%x", ptype->type);
455                         g_assert_not_reached ();
456                 }
457         }
458
459         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
460                 gr = PARAM_REGS;
461                 fr = FLOAT_PARAM_REGS;
462                 
463                 /* Emit the signature cookie just before the implicit arguments */
464                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
465         }
466
467         if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
468                 cinfo->need_stack_align = TRUE;
469                 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
470                 stack_size += cinfo->stack_align_amount;
471         }
472
473         cinfo->stack_usage = stack_size;
474         cinfo->reg_usage = gr;
475         cinfo->freg_usage = fr;
476         return cinfo;
477 }
478
479 /*
480  * mono_arch_get_argument_info:
481  * @csig:  a method signature
482  * @param_count: the number of parameters to consider
483  * @arg_info: an array to store the result infos
484  *
485  * Gathers information on parameters such as size, alignment and
486  * padding. arg_info should be large enought to hold param_count + 1 entries. 
487  *
488  * Returns the size of the argument area on the stack.
489  */
490 int
491 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
492 {
493         int k, args_size = 0;
494         int size, pad;
495         guint32 align;
496         int offset = 8;
497         CallInfo *cinfo;
498
499         cinfo = get_call_info (NULL, NULL, csig, FALSE);
500
501         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
502                 args_size += sizeof (gpointer);
503                 offset += 4;
504         }
505
506         arg_info [0].offset = offset;
507
508         if (csig->hasthis) {
509                 args_size += sizeof (gpointer);
510                 offset += 4;
511         }
512
513         arg_info [0].size = args_size;
514
515         for (k = 0; k < param_count; k++) {
516                 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
517
518                 /* ignore alignment for now */
519                 align = 1;
520
521                 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);   
522                 arg_info [k].pad = pad;
523                 args_size += size;
524                 arg_info [k + 1].pad = 0;
525                 arg_info [k + 1].size = size;
526                 offset += pad;
527                 arg_info [k + 1].offset = offset;
528                 offset += size;
529         }
530
531         if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
532                 align = MONO_ARCH_FRAME_ALIGNMENT;
533         else
534                 align = 4;
535         args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
536         arg_info [k].pad = pad;
537
538         g_free (cinfo);
539
540         return args_size;
541 }
542
543 static const guchar cpuid_impl [] = {
544         0x55,                           /* push   %ebp */
545         0x89, 0xe5,                     /* mov    %esp,%ebp */
546         0x53,                           /* push   %ebx */
547         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
548         0x0f, 0xa2,                     /* cpuid   */
549         0x50,                           /* push   %eax */
550         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
551         0x89, 0x18,                     /* mov    %ebx,(%eax) */
552         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
553         0x89, 0x08,                     /* mov    %ecx,(%eax) */
554         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
555         0x89, 0x10,                     /* mov    %edx,(%eax) */
556         0x58,                           /* pop    %eax */
557         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
558         0x89, 0x02,                     /* mov    %eax,(%edx) */
559         0x5b,                           /* pop    %ebx */
560         0xc9,                           /* leave   */
561         0xc3,                           /* ret     */
562 };
563
564 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
565
566 static int 
567 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
568 {
569         int have_cpuid = 0;
570 #ifndef _MSC_VER
571         __asm__  __volatile__ (
572                 "pushfl\n"
573                 "popl %%eax\n"
574                 "movl %%eax, %%edx\n"
575                 "xorl $0x200000, %%eax\n"
576                 "pushl %%eax\n"
577                 "popfl\n"
578                 "pushfl\n"
579                 "popl %%eax\n"
580                 "xorl %%edx, %%eax\n"
581                 "andl $0x200000, %%eax\n"
582                 "movl %%eax, %0"
583                 : "=r" (have_cpuid)
584                 :
585                 : "%eax", "%edx"
586         );
587 #else
588         __asm {
589                 pushfd
590                 pop eax
591                 mov edx, eax
592                 xor eax, 0x200000
593                 push eax
594                 popfd
595                 pushfd
596                 pop eax
597                 xor eax, edx
598                 and eax, 0x200000
599                 mov have_cpuid, eax
600         }
601 #endif
602         if (have_cpuid) {
603                 /* Have to use the code manager to get around WinXP DEP */
604                 static CpuidFunc func = NULL;
605                 void *ptr;
606                 if (!func) {
607                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
608                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
609                         func = (CpuidFunc)ptr;
610                 }
611                 func (id, p_eax, p_ebx, p_ecx, p_edx);
612
613                 /*
614                  * We use this approach because of issues with gcc and pic code, see:
615                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
616                 __asm__ __volatile__ ("cpuid"
617                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
618                         : "a" (id));
619                 */
620                 return 1;
621         }
622         return 0;
623 }
624
625 /*
626  * Initialize the cpu to execute managed code.
627  */
628 void
629 mono_arch_cpu_init (void)
630 {
631         /* spec compliance requires running with double precision */
632 #ifndef _MSC_VER
633         guint16 fpcw;
634
635         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
636         fpcw &= ~X86_FPCW_PRECC_MASK;
637         fpcw |= X86_FPCW_PREC_DOUBLE;
638         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
639         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
640 #else
641         _control87 (_PC_53, MCW_PC);
642 #endif
643 }
644
645 /*
646  * Initialize architecture specific code.
647  */
648 void
649 mono_arch_init (void)
650 {
651         InitializeCriticalSection (&mini_arch_mutex);
652 }
653
654 /*
655  * Cleanup architecture specific code.
656  */
657 void
658 mono_arch_cleanup (void)
659 {
660         DeleteCriticalSection (&mini_arch_mutex);
661 }
662
663 /*
664  * This function returns the optimizations supported on this cpu.
665  */
666 guint32
667 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
668 {
669         int eax, ebx, ecx, edx;
670         guint32 opts = 0;
671         
672         *exclude_mask = 0;
673         /* Feature Flags function, flags returned in EDX. */
674         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
675                 if (edx & (1 << 15)) {
676                         opts |= MONO_OPT_CMOV;
677                         if (edx & 1)
678                                 opts |= MONO_OPT_FCMOV;
679                         else
680                                 *exclude_mask |= MONO_OPT_FCMOV;
681                 } else
682                         *exclude_mask |= MONO_OPT_CMOV;
683                 if (edx & (1 << 26))
684                         opts |= MONO_OPT_SSE2;
685                 else
686                         *exclude_mask |= MONO_OPT_SSE2;
687
688 #ifdef MONO_ARCH_SIMD_INTRINSICS
689                 /*SIMD intrinsics require at least SSE2.*/
690                 if (!(opts & MONO_OPT_SSE2))
691                         *exclude_mask |= MONO_OPT_SIMD;
692 #endif
693         }
694         return opts;
695 }
696
697 /*
698  * This function test for all SSE functions supported.
699  *
700  * Returns a bitmask corresponding to all supported versions.
701  * 
702  * TODO detect other versions like SSE4a.
703  */
704 guint32
705 mono_arch_cpu_enumerate_simd_versions (void)
706 {
707         int eax, ebx, ecx, edx;
708         guint32 sse_opts = 0;
709
710         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
711                 if (edx & (1 << 25))
712                         sse_opts |= 1 << SIMD_VERSION_SSE1;
713                 if (edx & (1 << 26))
714                         sse_opts |= 1 << SIMD_VERSION_SSE2;
715                 if (ecx & (1 << 0))
716                         sse_opts |= 1 << SIMD_VERSION_SSE3;
717                 if (ecx & (1 << 9))
718                         sse_opts |= 1 << SIMD_VERSION_SSSE3;
719                 if (ecx & (1 << 19))
720                         sse_opts |= 1 << SIMD_VERSION_SSE41;
721                 if (ecx & (1 << 20))
722                         sse_opts |= 1 << SIMD_VERSION_SSE42;
723         }
724         return sse_opts;        
725 }
726
727 /*
728  * Determine whenever the trap whose info is in SIGINFO is caused by
729  * integer overflow.
730  */
731 gboolean
732 mono_arch_is_int_overflow (void *sigctx, void *info)
733 {
734         MonoContext ctx;
735         guint8* ip;
736
737         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
738
739         ip = (guint8*)ctx.eip;
740
741         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
742                 gint32 reg;
743
744                 /* idiv REG */
745                 switch (x86_modrm_rm (ip [1])) {
746                 case X86_EAX:
747                         reg = ctx.eax;
748                         break;
749                 case X86_ECX:
750                         reg = ctx.ecx;
751                         break;
752                 case X86_EDX:
753                         reg = ctx.edx;
754                         break;
755                 case X86_EBX:
756                         reg = ctx.ebx;
757                         break;
758                 case X86_ESI:
759                         reg = ctx.esi;
760                         break;
761                 case X86_EDI:
762                         reg = ctx.edi;
763                         break;
764                 default:
765                         g_assert_not_reached ();
766                         reg = -1;
767                 }
768
769                 if (reg == -1)
770                         return TRUE;
771         }
772                         
773         return FALSE;
774 }
775
776 GList *
777 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
778 {
779         GList *vars = NULL;
780         int i;
781
782         for (i = 0; i < cfg->num_varinfo; i++) {
783                 MonoInst *ins = cfg->varinfo [i];
784                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
785
786                 /* unused vars */
787                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
788                         continue;
789
790                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
791                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
792                         continue;
793
794                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
795                  * 8bit quantities in caller saved registers on x86 */
796                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
797                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
798                         g_assert (i == vmv->idx);
799                         vars = g_list_prepend (vars, vmv);
800                 }
801         }
802
803         vars = mono_varlist_sort (cfg, vars, 0);
804
805         return vars;
806 }
807
808 GList *
809 mono_arch_get_global_int_regs (MonoCompile *cfg)
810 {
811         GList *regs = NULL;
812
813         /* we can use 3 registers for global allocation */
814         regs = g_list_prepend (regs, (gpointer)X86_EBX);
815         regs = g_list_prepend (regs, (gpointer)X86_ESI);
816         regs = g_list_prepend (regs, (gpointer)X86_EDI);
817
818         return regs;
819 }
820
821 /*
822  * mono_arch_regalloc_cost:
823  *
824  *  Return the cost, in number of memory references, of the action of 
825  * allocating the variable VMV into a register during global register
826  * allocation.
827  */
828 guint32
829 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
830 {
831         MonoInst *ins = cfg->varinfo [vmv->idx];
832
833         if (cfg->method->save_lmf)
834                 /* The register is already saved */
835                 return (ins->opcode == OP_ARG) ? 1 : 0;
836         else
837                 /* push+pop+possible load if it is an argument */
838                 return (ins->opcode == OP_ARG) ? 3 : 2;
839 }
840  
841 /*
842  * Set var information according to the calling convention. X86 version.
843  * The locals var stuff should most likely be split in another method.
844  */
845 void
846 mono_arch_allocate_vars (MonoCompile *cfg)
847 {
848         MonoMethodSignature *sig;
849         MonoMethodHeader *header;
850         MonoInst *inst;
851         guint32 locals_stack_size, locals_stack_align;
852         int i, offset;
853         gint32 *offsets;
854         CallInfo *cinfo;
855
856         header = mono_method_get_header (cfg->method);
857         sig = mono_method_signature (cfg->method);
858
859         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
860
861         cfg->frame_reg = X86_EBP;
862         offset = 0;
863
864         /* Reserve space to save LMF and caller saved registers */
865
866         if (cfg->method->save_lmf) {
867                 offset += sizeof (MonoLMF);
868         } else {
869                 if (cfg->used_int_regs & (1 << X86_EBX)) {
870                         offset += 4;
871                 }
872
873                 if (cfg->used_int_regs & (1 << X86_EDI)) {
874                         offset += 4;
875                 }
876
877                 if (cfg->used_int_regs & (1 << X86_ESI)) {
878                         offset += 4;
879                 }
880         }
881
882         switch (cinfo->ret.storage) {
883         case ArgValuetypeInReg:
884                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
885                 offset += 8;
886                 cfg->ret->opcode = OP_REGOFFSET;
887                 cfg->ret->inst_basereg = X86_EBP;
888                 cfg->ret->inst_offset = - offset;
889                 break;
890         default:
891                 break;
892         }
893
894         /* Allocate locals */
895         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
896         if (locals_stack_align) {
897                 offset += (locals_stack_align - 1);
898                 offset &= ~(locals_stack_align - 1);
899         }
900         /*
901          * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
902          * have locals larger than 8 bytes we need to make sure that
903          * they have the appropriate offset.
904          */
905         if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
906                 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
907         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
908                 if (offsets [i] != -1) {
909                         MonoInst *inst = cfg->varinfo [i];
910                         inst->opcode = OP_REGOFFSET;
911                         inst->inst_basereg = X86_EBP;
912                         inst->inst_offset = - (offset + offsets [i]);
913                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
914                 }
915         }
916         offset += locals_stack_size;
917
918
919         /*
920          * Allocate arguments+return value
921          */
922
923         switch (cinfo->ret.storage) {
924         case ArgOnStack:
925                 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
926                         /* 
927                          * In the new IR, the cfg->vret_addr variable represents the
928                          * vtype return value.
929                          */
930                         cfg->vret_addr->opcode = OP_REGOFFSET;
931                         cfg->vret_addr->inst_basereg = cfg->frame_reg;
932                         cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
933                         if (G_UNLIKELY (cfg->verbose_level > 1)) {
934                                 printf ("vret_addr =");
935                                 mono_print_ins (cfg->vret_addr);
936                         }
937                 } else {
938                         cfg->ret->opcode = OP_REGOFFSET;
939                         cfg->ret->inst_basereg = X86_EBP;
940                         cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
941                 }
942                 break;
943         case ArgValuetypeInReg:
944                 break;
945         case ArgInIReg:
946                 cfg->ret->opcode = OP_REGVAR;
947                 cfg->ret->inst_c0 = cinfo->ret.reg;
948                 cfg->ret->dreg = cinfo->ret.reg;
949                 break;
950         case ArgNone:
951         case ArgOnFloatFpStack:
952         case ArgOnDoubleFpStack:
953                 break;
954         default:
955                 g_assert_not_reached ();
956         }
957
958         if (sig->call_convention == MONO_CALL_VARARG) {
959                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
960                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
961         }
962
963         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
964                 ArgInfo *ainfo = &cinfo->args [i];
965                 inst = cfg->args [i];
966                 if (inst->opcode != OP_REGVAR) {
967                         inst->opcode = OP_REGOFFSET;
968                         inst->inst_basereg = X86_EBP;
969                 }
970                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
971         }
972
973         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
974         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
975
976         cfg->stack_offset = offset;
977 }
978
979 void
980 mono_arch_create_vars (MonoCompile *cfg)
981 {
982         MonoMethodSignature *sig;
983         CallInfo *cinfo;
984
985         sig = mono_method_signature (cfg->method);
986
987         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
988
989         if (cinfo->ret.storage == ArgValuetypeInReg)
990                 cfg->ret_var_is_local = TRUE;
991         if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
992                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
993         }
994 }
995
996 static void
997 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
998 {
999         MonoInst *arg;
1000         MonoMethodSignature *tmp_sig;
1001         MonoInst *sig_arg;
1002
1003         /* FIXME: Add support for signature tokens to AOT */
1004         cfg->disable_aot = TRUE;
1005         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1006
1007         /*
1008          * mono_ArgIterator_Setup assumes the signature cookie is 
1009          * passed first and all the arguments which were before it are
1010          * passed on the stack after the signature. So compensate by 
1011          * passing a different signature.
1012          */
1013         tmp_sig = mono_metadata_signature_dup (call->signature);
1014         tmp_sig->param_count -= call->signature->sentinelpos;
1015         tmp_sig->sentinelpos = 0;
1016         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1017
1018         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1019         sig_arg->inst_p0 = tmp_sig;
1020
1021         arg->inst_left = sig_arg;
1022         arg->type = STACK_PTR;
1023         /* prepend, so they get reversed */
1024         arg->next = call->out_args;
1025         call->out_args = arg;
1026 }
1027
1028 /*
1029  * It is expensive to adjust esp for each individual fp argument pushed on the stack
1030  * so we try to do it just once when we have multiple fp arguments in a row.
1031  * We don't use this mechanism generally because for int arguments the generated code
1032  * is slightly bigger and new generation cpus optimize away the dependency chains
1033  * created by push instructions on the esp value.
1034  * fp_arg_setup is the first argument in the execution sequence where the esp register
1035  * is modified.
1036  */
1037 static int
1038 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1039 {
1040         int fp_space = 0;
1041         MonoType *t;
1042
1043         for (; start_arg < sig->param_count; ++start_arg) {
1044                 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1045                 if (!t->byref && t->type == MONO_TYPE_R8) {
1046                         fp_space += sizeof (double);
1047                         *fp_arg_setup = start_arg;
1048                 } else {
1049                         break;
1050                 }
1051         }
1052         return fp_space;
1053 }
1054
1055 /* 
1056  * take the arguments and generate the arch-specific
1057  * instructions to properly call the function in call.
1058  * This includes pushing, moving arguments to the right register
1059  * etc.
1060  */
1061 MonoCallInst*
1062 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
1063         MonoInst *arg, *in;
1064         MonoMethodSignature *sig;
1065         int i, n;
1066         CallInfo *cinfo;
1067         int sentinelpos = 0;
1068         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
1069
1070         sig = call->signature;
1071         n = sig->param_count + sig->hasthis;
1072
1073         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1074
1075         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1076                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
1077
1078         for (i = 0; i < n; ++i) {
1079                 ArgInfo *ainfo = cinfo->args + i;
1080
1081                 /* Emit the signature cookie just before the implicit arguments */
1082                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1083                         emit_sig_cookie (cfg, call);
1084                 }
1085
1086                 if (is_virtual && i == 0) {
1087                         /* the argument will be attached to the call instrucion */
1088                         in = call->args [i];
1089                 } else {
1090                         MonoType *t;
1091
1092                         if (i >= sig->hasthis)
1093                                 t = sig->params [i - sig->hasthis];
1094                         else
1095                                 t = &mono_defaults.int_class->byval_arg;
1096                         t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1097
1098                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1099                         in = call->args [i];
1100                         arg->cil_code = in->cil_code;
1101                         arg->inst_left = in;
1102                         arg->type = in->type;
1103                         /* prepend, so they get reversed */
1104                         arg->next = call->out_args;
1105                         call->out_args = arg;
1106
1107                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1108                                 gint align;
1109                                 guint32 ialign;
1110                                 guint32 size;
1111
1112                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
1113                                         size = sizeof (MonoTypedRef);
1114                                         align = sizeof (gpointer);
1115                                 }
1116                                 else {
1117                                         size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign, sig->pinvoke);
1118                                 }
1119                                 arg->opcode = OP_OUTARG_VT;
1120                                 arg->klass = in->klass;
1121                                 arg->backend.is_pinvoke = sig->pinvoke;
1122                                 arg->inst_imm = size; 
1123                         }
1124                         else {
1125                                 switch (ainfo->storage) {
1126                                 case ArgOnStack:
1127                                         arg->opcode = OP_OUTARG;
1128                                         if (!t->byref) {
1129                                                 if (t->type == MONO_TYPE_R4) {
1130                                                         arg->opcode = OP_OUTARG_R4;
1131                                                 } else if (t->type == MONO_TYPE_R8) {
1132                                                         arg->opcode = OP_OUTARG_R8;
1133                                                         /* we store in the upper bits of backen.arg_info the needed
1134                                                          * esp adjustment and in the lower bits the offset from esp
1135                                                          * where the arg needs to be stored
1136                                                          */
1137                                                         if (!fp_args_space) {
1138                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1139                                                                 fp_args_offset = fp_args_space;
1140                                                         }
1141                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1142                                                         fp_args_offset -= sizeof (double);
1143                                                         if (i - sig->hasthis == fp_arg_setup) {
1144                                                                 arg->backend.arg_info |= fp_args_space << 16;
1145                                                         }
1146                                                         if (fp_args_offset == 0) {
1147                                                                 /* the allocated esp stack is finished:
1148                                                                  * prepare for an eventual second run of fp args
1149                                                                  */
1150                                                                 fp_args_space = 0;
1151                                                         }
1152                                                 }
1153                                         }
1154                                         break;
1155                                 default:
1156                                         g_assert_not_reached ();
1157                                 }
1158                         }
1159                 }
1160         }
1161
1162         /* Handle the case where there are no implicit arguments */
1163         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1164                 emit_sig_cookie (cfg, call);
1165         }
1166
1167         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1168                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1169                         MonoInst *zero_inst;
1170                         /*
1171                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1172                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1173                          * before calling the function. So we add a dummy instruction to represent pushing the 
1174                          * struct return address to the stack. The return address will be saved to this stack slot 
1175                          * by the code emitted in this_vret_args.
1176                          */
1177                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1178                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1179                         zero_inst->inst_p0 = 0;
1180                         arg->inst_left = zero_inst;
1181                         arg->type = STACK_PTR;
1182                         /* prepend, so they get reversed */
1183                         arg->next = call->out_args;
1184                         call->out_args = arg;
1185                 }
1186                 else
1187                         /* if the function returns a struct, the called method already does a ret $0x4 */
1188                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1189                                 cinfo->stack_usage -= 4;
1190         }
1191
1192         call->stack_usage = cinfo->stack_usage;
1193
1194         if (cinfo->need_stack_align) {
1195                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1196                 arg->inst_c0 = cinfo->stack_align_amount;
1197                 arg->next = call->out_args;
1198                 call->out_args = arg;
1199         }
1200
1201         return call;
1202 }
1203
1204 static void
1205 emit_sig_cookie2 (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1206 {
1207         MonoMethodSignature *tmp_sig;
1208
1209         /* FIXME: Add support for signature tokens to AOT */
1210         cfg->disable_aot = TRUE;
1211
1212         /*
1213          * mono_ArgIterator_Setup assumes the signature cookie is 
1214          * passed first and all the arguments which were before it are
1215          * passed on the stack after the signature. So compensate by 
1216          * passing a different signature.
1217          */
1218         tmp_sig = mono_metadata_signature_dup (call->signature);
1219         tmp_sig->param_count -= call->signature->sentinelpos;
1220         tmp_sig->sentinelpos = 0;
1221         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1222
1223         MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1224 }
1225
1226 void
1227 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1228 {
1229         MonoInst *arg, *in;
1230         MonoMethodSignature *sig;
1231         int i, n;
1232         CallInfo *cinfo;
1233         int sentinelpos = 0;
1234
1235         sig = call->signature;
1236         n = sig->param_count + sig->hasthis;
1237
1238         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1239
1240         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1241                 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1242
1243         if (cinfo->need_stack_align) {
1244                 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1245                 arg->dreg = X86_ESP;
1246                 arg->sreg1 = X86_ESP;
1247                 arg->inst_imm = cinfo->stack_align_amount;
1248                 MONO_ADD_INS (cfg->cbb, arg);
1249         }
1250
1251         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1252                 MonoInst *vtarg;
1253
1254                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1255                         if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
1256                                 /*
1257                                  * Tell the JIT to use a more efficient calling convention: call using
1258                                  * OP_CALL, compute the result location after the call, and save the 
1259                                  * result there.
1260                                  */
1261                                 call->vret_in_reg = TRUE;
1262                         } else {
1263                                 /*
1264                                  * The valuetype is in EAX:EDX after the call, needs to be copied to
1265                                  * the stack. Save the address here, so the call instruction can
1266                                  * access it.
1267                                  */
1268                                 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1269                                 vtarg->sreg1 = call->vret_var->dreg;
1270                                 MONO_ADD_INS (cfg->cbb, vtarg);
1271                         }
1272                 }
1273         }
1274
1275         /* Handle the case where there are no implicit arguments */
1276         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1277                 emit_sig_cookie2 (cfg, call, cinfo);
1278         }
1279
1280         /* Arguments are pushed in the reverse order */
1281         for (i = n - 1; i >= 0; i --) {
1282                 ArgInfo *ainfo = cinfo->args + i;
1283                 MonoType *t;
1284
1285                 if (i >= sig->hasthis)
1286                         t = sig->params [i - sig->hasthis];
1287                 else
1288                         t = &mono_defaults.int_class->byval_arg;
1289                 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1290
1291                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1292
1293                 in = call->args [i];
1294                 arg->cil_code = in->cil_code;
1295                 arg->sreg1 = in->dreg;
1296                 arg->type = in->type;
1297
1298                 g_assert (in->dreg != -1);
1299
1300                 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1301                         guint32 align;
1302                         guint32 size;
1303
1304                         g_assert (in->klass);
1305
1306                         if (t->type == MONO_TYPE_TYPEDBYREF) {
1307                                 size = sizeof (MonoTypedRef);
1308                                 align = sizeof (gpointer);
1309                         }
1310                         else {
1311                                 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1312                         }
1313
1314                         if (size > 0) {
1315                                 arg->opcode = OP_OUTARG_VT;
1316                                 arg->sreg1 = in->dreg;
1317                                 arg->klass = in->klass;
1318                                 arg->backend.size = size;
1319
1320                                 MONO_ADD_INS (cfg->cbb, arg);
1321                         }
1322                 }
1323                 else {
1324                         switch (ainfo->storage) {
1325                         case ArgOnStack:
1326                                 arg->opcode = OP_X86_PUSH;
1327                                 if (!t->byref) {
1328                                         if (t->type == MONO_TYPE_R4) {
1329                                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1330                                                 arg->opcode = OP_STORER4_MEMBASE_REG;
1331                                                 arg->inst_destbasereg = X86_ESP;
1332                                                 arg->inst_offset = 0;
1333                                         } else if (t->type == MONO_TYPE_R8) {
1334                                                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1335                                                 arg->opcode = OP_STORER8_MEMBASE_REG;
1336                                                 arg->inst_destbasereg = X86_ESP;
1337                                                 arg->inst_offset = 0;
1338                                         } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1339                                                 arg->sreg1 ++;
1340                                                 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1341                                         }
1342                                 }
1343                                 break;
1344                         default:
1345                                 g_assert_not_reached ();
1346                         }
1347                         
1348                         MONO_ADD_INS (cfg->cbb, arg);
1349                 }
1350
1351                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1352                         /* Emit the signature cookie just before the implicit arguments */
1353                         emit_sig_cookie2 (cfg, call, cinfo);
1354                 }
1355         }
1356
1357         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1358                 MonoInst *vtarg;
1359
1360                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1361                         /* Already done */
1362                 }
1363                 else if (cinfo->ret.storage == ArgInIReg) {
1364                         NOT_IMPLEMENTED;
1365                         /* The return address is passed in a register */
1366                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1367                         vtarg->sreg1 = call->inst.dreg;
1368                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
1369                         MONO_ADD_INS (cfg->cbb, vtarg);
1370                                 
1371                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1372                 } else {
1373                         MonoInst *vtarg;
1374                         MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1375                         vtarg->type = STACK_MP;
1376                         vtarg->sreg1 = call->vret_var->dreg;
1377                         MONO_ADD_INS (cfg->cbb, vtarg);
1378                 }
1379
1380                 /* if the function returns a struct, the called method already does a ret $0x4 */
1381                 cinfo->stack_usage -= 4;
1382         }
1383
1384         call->stack_usage = cinfo->stack_usage;
1385 }
1386
1387 void
1388 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1389 {
1390         MonoInst *arg;
1391         int size = ins->backend.size;
1392
1393         if (size <= 4) {
1394                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1395                 arg->sreg1 = src->dreg;
1396
1397                 MONO_ADD_INS (cfg->cbb, arg);
1398         } else if (size <= 20) {        
1399                 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1400                 mini_emit_memcpy2 (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1401         } else {
1402                 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1403                 arg->inst_basereg = src->dreg;
1404                 arg->inst_offset = 0;
1405                 arg->inst_imm = size;
1406                                         
1407                 MONO_ADD_INS (cfg->cbb, arg);
1408         }
1409 }
1410
1411 void
1412 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1413 {
1414         MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1415
1416         if (!ret->byref) {
1417                 if (ret->type == MONO_TYPE_R4) {
1418                         /* Nothing to do */
1419                         return;
1420                 } else if (ret->type == MONO_TYPE_R8) {
1421                         /* Nothing to do */
1422                         return;
1423                 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1424                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1425                         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1426                         return;
1427                 }
1428         }
1429                         
1430         MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1431 }
1432
1433 /*
1434  * Allow tracing to work with this interface (with an optional argument)
1435  */
1436 void*
1437 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1438 {
1439         guchar *code = p;
1440
1441         g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1442         x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1443
1444         /* if some args are passed in registers, we need to save them here */
1445         x86_push_reg (code, X86_EBP);
1446
1447         if (cfg->compile_aot) {
1448                 x86_push_imm (code, cfg->method);
1449                 x86_mov_reg_imm (code, X86_EAX, func);
1450                 x86_call_reg (code, X86_EAX);
1451         } else {
1452                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1453                 x86_push_imm (code, cfg->method);
1454                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1455                 x86_call_code (code, 0);
1456         }
1457         x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1458
1459         return code;
1460 }
1461
1462 enum {
1463         SAVE_NONE,
1464         SAVE_STRUCT,
1465         SAVE_EAX,
1466         SAVE_EAX_EDX,
1467         SAVE_FP
1468 };
1469
1470 void*
1471 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1472 {
1473         guchar *code = p;
1474         int arg_size = 0, save_mode = SAVE_NONE;
1475         MonoMethod *method = cfg->method;
1476         
1477         switch (mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret)->type) {
1478         case MONO_TYPE_VOID:
1479                 /* special case string .ctor icall */
1480                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1481                         save_mode = SAVE_EAX;
1482                 else
1483                         save_mode = SAVE_NONE;
1484                 break;
1485         case MONO_TYPE_I8:
1486         case MONO_TYPE_U8:
1487                 save_mode = SAVE_EAX_EDX;
1488                 break;
1489         case MONO_TYPE_R4:
1490         case MONO_TYPE_R8:
1491                 save_mode = SAVE_FP;
1492                 break;
1493         case MONO_TYPE_GENERICINST:
1494                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1495                         save_mode = SAVE_EAX;
1496                         break;
1497                 }
1498                 /* Fall through */
1499         case MONO_TYPE_VALUETYPE:
1500                 save_mode = SAVE_STRUCT;
1501                 break;
1502         default:
1503                 save_mode = SAVE_EAX;
1504                 break;
1505         }
1506
1507         switch (save_mode) {
1508         case SAVE_EAX_EDX:
1509                 x86_push_reg (code, X86_EDX);
1510                 x86_push_reg (code, X86_EAX);
1511                 if (enable_arguments) {
1512                         x86_push_reg (code, X86_EDX);
1513                         x86_push_reg (code, X86_EAX);
1514                         arg_size = 8;
1515                 }
1516                 break;
1517         case SAVE_EAX:
1518                 x86_push_reg (code, X86_EAX);
1519                 if (enable_arguments) {
1520                         x86_push_reg (code, X86_EAX);
1521                         arg_size = 4;
1522                 }
1523                 break;
1524         case SAVE_FP:
1525                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1526                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1527                 if (enable_arguments) {
1528                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1529                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1530                         arg_size = 8;
1531                 }
1532                 break;
1533         case SAVE_STRUCT:
1534                 if (enable_arguments) {
1535                         x86_push_membase (code, X86_EBP, 8);
1536                         arg_size = 4;
1537                 }
1538                 break;
1539         case SAVE_NONE:
1540         default:
1541                 break;
1542         }
1543
1544         if (cfg->compile_aot) {
1545                 x86_push_imm (code, method);
1546                 x86_mov_reg_imm (code, X86_EAX, func);
1547                 x86_call_reg (code, X86_EAX);
1548         } else {
1549                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1550                 x86_push_imm (code, method);
1551                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1552                 x86_call_code (code, 0);
1553         }
1554         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1555
1556         switch (save_mode) {
1557         case SAVE_EAX_EDX:
1558                 x86_pop_reg (code, X86_EAX);
1559                 x86_pop_reg (code, X86_EDX);
1560                 break;
1561         case SAVE_EAX:
1562                 x86_pop_reg (code, X86_EAX);
1563                 break;
1564         case SAVE_FP:
1565                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1566                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1567                 break;
1568         case SAVE_NONE:
1569         default:
1570                 break;
1571         }
1572
1573         return code;
1574 }
1575
1576 #define EMIT_COND_BRANCH(ins,cond,sign) \
1577 if (ins->flags & MONO_INST_BRLABEL) { \
1578         if (ins->inst_i0->inst_c0) { \
1579                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1580         } else { \
1581                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1582                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1583                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1584                         x86_branch8 (code, cond, 0, sign); \
1585                 else \
1586                         x86_branch32 (code, cond, 0, sign); \
1587         } \
1588 } else { \
1589         if (ins->inst_true_bb->native_offset) { \
1590                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1591         } else { \
1592                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1593                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1594                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1595                         x86_branch8 (code, cond, 0, sign); \
1596                 else \
1597                         x86_branch32 (code, cond, 0, sign); \
1598         } \
1599 }
1600
1601 /*  
1602  *      Emit an exception if condition is fail and
1603  *  if possible do a directly branch to target 
1604  */
1605 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1606         do {                                                        \
1607                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1608                 if (tins == NULL) {                                                                             \
1609                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1610                                         MONO_PATCH_INFO_EXC, exc_name);  \
1611                         x86_branch32 (code, cond, 0, signed);               \
1612                 } else {        \
1613                         EMIT_COND_BRANCH (tins, cond, signed);  \
1614                 }                       \
1615         } while (0); 
1616
1617 #define EMIT_FPCOMPARE(code) do { \
1618         x86_fcompp (code); \
1619         x86_fnstsw (code); \
1620 } while (0); 
1621
1622
1623 static guint8*
1624 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1625 {
1626         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1627         x86_call_code (code, 0);
1628
1629         return code;
1630 }
1631
1632 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1633
1634 /*
1635  * mono_peephole_pass_1:
1636  *
1637  *   Perform peephole opts which should/can be performed before local regalloc
1638  */
1639 void
1640 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1641 {
1642         MonoInst *ins, *n;
1643
1644         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1645                 MonoInst *last_ins = ins->prev;
1646
1647                 switch (ins->opcode) {
1648                 case OP_IADD_IMM:
1649                 case OP_ADD_IMM:
1650                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1651                                 /* 
1652                                  * X86_LEA is like ADD, but doesn't have the
1653                                  * sreg1==dreg restriction.
1654                                  */
1655                                 ins->opcode = OP_X86_LEA_MEMBASE;
1656                                 ins->inst_basereg = ins->sreg1;
1657                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1658                                 ins->opcode = OP_X86_INC_REG;
1659                         break;
1660                 case OP_SUB_IMM:
1661                 case OP_ISUB_IMM:
1662                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1663                                 ins->opcode = OP_X86_LEA_MEMBASE;
1664                                 ins->inst_basereg = ins->sreg1;
1665                                 ins->inst_imm = -ins->inst_imm;
1666                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1667                                 ins->opcode = OP_X86_DEC_REG;
1668                         break;
1669                 case OP_COMPARE_IMM:
1670                 case OP_ICOMPARE_IMM:
1671                         /* OP_COMPARE_IMM (reg, 0) 
1672                          * --> 
1673                          * OP_X86_TEST_NULL (reg) 
1674                          */
1675                         if (!ins->inst_imm)
1676                                 ins->opcode = OP_X86_TEST_NULL;
1677                         break;
1678                 case OP_X86_COMPARE_MEMBASE_IMM:
1679                         /* 
1680                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1681                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1682                          * -->
1683                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1684                          * OP_COMPARE_IMM reg, imm
1685                          *
1686                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1687                          */
1688                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1689                             ins->inst_basereg == last_ins->inst_destbasereg &&
1690                             ins->inst_offset == last_ins->inst_offset) {
1691                                         ins->opcode = OP_COMPARE_IMM;
1692                                         ins->sreg1 = last_ins->sreg1;
1693
1694                                         /* check if we can remove cmp reg,0 with test null */
1695                                         if (!ins->inst_imm)
1696                                                 ins->opcode = OP_X86_TEST_NULL;
1697                                 }
1698
1699                         break;                  
1700                 case OP_X86_PUSH_MEMBASE:
1701                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1702                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1703                             ins->inst_basereg == last_ins->inst_destbasereg &&
1704                             ins->inst_offset == last_ins->inst_offset) {
1705                                     ins->opcode = OP_X86_PUSH;
1706                                     ins->sreg1 = last_ins->sreg1;
1707                         }
1708                         break;
1709                 }
1710
1711                 mono_peephole_ins (bb, ins);
1712         }
1713 }
1714
1715 void
1716 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1717 {
1718         MonoInst *ins, *n;
1719
1720         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1721                 switch (ins->opcode) {
1722                 case OP_ICONST:
1723                         /* reg = 0 -> XOR (reg, reg) */
1724                         /* XOR sets cflags on x86, so we cant do it always */
1725                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1726                                 MonoInst *ins2;
1727
1728                                 ins->opcode = OP_IXOR;
1729                                 ins->sreg1 = ins->dreg;
1730                                 ins->sreg2 = ins->dreg;
1731
1732                                 /* 
1733                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1734                                  * since it takes 3 bytes instead of 7.
1735                                  */
1736                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1737                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1738                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1739                                                 ins2->sreg1 = ins->dreg;
1740                                         }
1741                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1742                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1743                                                 ins2->sreg1 = ins->dreg;
1744                                         }
1745                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1746                                                 /* Continue iteration */
1747                                         }
1748                                         else
1749                                                 break;
1750                                 }
1751                         }
1752                         break;
1753                 case OP_IADD_IMM:
1754                 case OP_ADD_IMM:
1755                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1756                                 ins->opcode = OP_X86_INC_REG;
1757                         break;
1758                 case OP_ISUB_IMM:
1759                 case OP_SUB_IMM:
1760                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1761                                 ins->opcode = OP_X86_DEC_REG;
1762                         break;
1763                 }
1764
1765                 mono_peephole_ins (bb, ins);
1766         }
1767 }
1768
1769 /*
1770  * mono_arch_lowering_pass:
1771  *
1772  *  Converts complex opcodes into simpler ones so that each IR instruction
1773  * corresponds to one machine instruction.
1774  */
1775 void
1776 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1777 {
1778         MonoInst *ins, *next;
1779
1780         if (bb->max_vreg > cfg->rs->next_vreg)
1781                 cfg->rs->next_vreg = bb->max_vreg;
1782
1783         /*
1784          * FIXME: Need to add more instructions, but the current machine 
1785          * description can't model some parts of the composite instructions like
1786          * cdq.
1787          */
1788         MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1789                 switch (ins->opcode) {
1790                 case OP_IREM_IMM:
1791                 case OP_IDIV_IMM:
1792                 case OP_IDIV_UN_IMM:
1793                 case OP_IREM_UN_IMM:
1794                         /* 
1795                          * Keep the cases where we could generated optimized code, otherwise convert
1796                          * to the non-imm variant.
1797                          */
1798                         if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1799                                 break;
1800                         mono_decompose_op_imm (cfg, bb, ins);
1801                         break;
1802                 default:
1803                         break;
1804                 }
1805         }
1806
1807         bb->max_vreg = cfg->rs->next_vreg;
1808 }
1809
1810 static const int 
1811 branch_cc_table [] = {
1812         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1813         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1814         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1815 };
1816
1817 /* Maps CMP_... constants to X86_CC_... constants */
1818 static const int
1819 cc_table [] = {
1820         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1821         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1822 };
1823
1824 static const int
1825 cc_signed_table [] = {
1826         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1827         FALSE, FALSE, FALSE, FALSE
1828 };
1829
1830 static unsigned char*
1831 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1832 {
1833 #define XMM_TEMP_REG 0
1834         /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1835         /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1836         if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1837                 /* optimize by assigning a local var for this use so we avoid
1838                  * the stack manipulations */
1839                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1840                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1841                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1842                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1843                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1844                 if (size == 1)
1845                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1846                 else if (size == 2)
1847                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1848                 return code;
1849         }
1850         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1851         x86_fnstcw_membase(code, X86_ESP, 0);
1852         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1853         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1854         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1855         x86_fldcw_membase (code, X86_ESP, 2);
1856         if (size == 8) {
1857                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1858                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1859                 x86_pop_reg (code, dreg);
1860                 /* FIXME: need the high register 
1861                  * x86_pop_reg (code, dreg_high);
1862                  */
1863         } else {
1864                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1865                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1866                 x86_pop_reg (code, dreg);
1867         }
1868         x86_fldcw_membase (code, X86_ESP, 0);
1869         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1870
1871         if (size == 1)
1872                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1873         else if (size == 2)
1874                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1875         return code;
1876 }
1877
1878 static unsigned char*
1879 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1880 {
1881         int sreg = tree->sreg1;
1882         int need_touch = FALSE;
1883
1884 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1885         need_touch = TRUE;
1886 #endif
1887
1888         if (need_touch) {
1889                 guint8* br[5];
1890
1891                 /*
1892                  * Under Windows:
1893                  * If requested stack size is larger than one page,
1894                  * perform stack-touch operation
1895                  */
1896                 /*
1897                  * Generate stack probe code.
1898                  * Under Windows, it is necessary to allocate one page at a time,
1899                  * "touching" stack after each successful sub-allocation. This is
1900                  * because of the way stack growth is implemented - there is a
1901                  * guard page before the lowest stack page that is currently commited.
1902                  * Stack normally grows sequentially so OS traps access to the
1903                  * guard page and commits more pages when needed.
1904                  */
1905                 x86_test_reg_imm (code, sreg, ~0xFFF);
1906                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1907
1908                 br[2] = code; /* loop */
1909                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1910                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1911
1912                 /* 
1913                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1914                  * that follows only initializes the last part of the area.
1915                  */
1916                 /* Same as the init code below with size==0x1000 */
1917                 if (tree->flags & MONO_INST_INIT) {
1918                         x86_push_reg (code, X86_EAX);
1919                         x86_push_reg (code, X86_ECX);
1920                         x86_push_reg (code, X86_EDI);
1921                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1922                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1923                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1924                         x86_cld (code);
1925                         x86_prefix (code, X86_REP_PREFIX);
1926                         x86_stosl (code);
1927                         x86_pop_reg (code, X86_EDI);
1928                         x86_pop_reg (code, X86_ECX);
1929                         x86_pop_reg (code, X86_EAX);
1930                 }
1931
1932                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1933                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1934                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1935                 x86_patch (br[3], br[2]);
1936                 x86_test_reg_reg (code, sreg, sreg);
1937                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1938                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1939
1940                 br[1] = code; x86_jump8 (code, 0);
1941
1942                 x86_patch (br[0], code);
1943                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1944                 x86_patch (br[1], code);
1945                 x86_patch (br[4], code);
1946         }
1947         else
1948                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1949
1950         if (tree->flags & MONO_INST_INIT) {
1951                 int offset = 0;
1952                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1953                         x86_push_reg (code, X86_EAX);
1954                         offset += 4;
1955                 }
1956                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1957                         x86_push_reg (code, X86_ECX);
1958                         offset += 4;
1959                 }
1960                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1961                         x86_push_reg (code, X86_EDI);
1962                         offset += 4;
1963                 }
1964                 
1965                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1966                 if (sreg != X86_ECX)
1967                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1968                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1969                                 
1970                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1971                 x86_cld (code);
1972                 x86_prefix (code, X86_REP_PREFIX);
1973                 x86_stosl (code);
1974                 
1975                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1976                         x86_pop_reg (code, X86_EDI);
1977                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1978                         x86_pop_reg (code, X86_ECX);
1979                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1980                         x86_pop_reg (code, X86_EAX);
1981         }
1982         return code;
1983 }
1984
1985
1986 static guint8*
1987 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1988 {
1989         CallInfo *cinfo;
1990         int quad;
1991
1992         /* Move return value to the target register */
1993         switch (ins->opcode) {
1994         case OP_CALL:
1995         case OP_CALL_REG:
1996         case OP_CALL_MEMBASE:
1997                 if (ins->dreg != X86_EAX)
1998                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1999                 break;
2000         case OP_VCALL:
2001         case OP_VCALL_REG:
2002         case OP_VCALL_MEMBASE:
2003         case OP_VCALL2:
2004         case OP_VCALL2_REG:
2005         case OP_VCALL2_MEMBASE:
2006                 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
2007                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2008                         /* Pop the destination address from the stack */
2009                         x86_pop_reg (code, X86_ECX);
2010                         
2011                         for (quad = 0; quad < 2; quad ++) {
2012                                 switch (cinfo->ret.pair_storage [quad]) {
2013                                 case ArgInIReg:
2014                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2015                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2016                                         break;
2017                                 case ArgNone:
2018                                         break;
2019                                 default:
2020                                         g_assert_not_reached ();
2021                                 }
2022                         }
2023                 }
2024                 break;
2025         case OP_FCALL: {
2026                 MonoCallInst *call = (MonoCallInst*)ins;
2027                 if (call->method && !mono_method_signature (call->method)->ret->byref && mono_method_signature (call->method)->ret->type == MONO_TYPE_R4) {
2028                         /* Avoid some precision issues by saving/reloading the return value */
2029                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2030                         x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
2031                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2032                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2033                 }
2034                 break;
2035         }
2036         default:
2037                 break;
2038         }
2039
2040         return code;
2041 }
2042
2043 /*
2044  * mono_x86_emit_tls_get:
2045  * @code: buffer to store code to
2046  * @dreg: hard register where to place the result
2047  * @tls_offset: offset info
2048  *
2049  * mono_x86_emit_tls_get emits in @code the native code that puts in
2050  * the dreg register the item in the thread local storage identified
2051  * by tls_offset.
2052  *
2053  * Returns: a pointer to the end of the stored code
2054  */
2055 guint8*
2056 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
2057 {
2058 #ifdef PLATFORM_WIN32
2059         /* 
2060          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2061          * Journal and/or a disassembly of the TlsGet () function.
2062          */
2063         g_assert (tls_offset < 64);
2064         x86_prefix (code, X86_FS_PREFIX);
2065         x86_mov_reg_mem (code, dreg, 0x18, 4);
2066         /* Dunno what this does but TlsGetValue () contains it */
2067         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2068         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2069 #else
2070         if (optimize_for_xen) {
2071                 x86_prefix (code, X86_GS_PREFIX);
2072                 x86_mov_reg_mem (code, dreg, 0, 4);
2073                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2074         } else {
2075                 x86_prefix (code, X86_GS_PREFIX);
2076                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2077         }
2078 #endif
2079         return code;
2080 }
2081
2082 /*
2083  * emit_load_volatile_arguments:
2084  *
2085  *  Load volatile arguments from the stack to the original input registers.
2086  * Required before a tail call.
2087  */
2088 static guint8*
2089 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2090 {
2091         MonoMethod *method = cfg->method;
2092         MonoMethodSignature *sig;
2093         MonoInst *inst;
2094         CallInfo *cinfo;
2095         guint32 i;
2096
2097         /* FIXME: Generate intermediate code instead */
2098
2099         sig = mono_method_signature (method);
2100
2101         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
2102         
2103         /* This is the opposite of the code in emit_prolog */
2104
2105         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2106                 ArgInfo *ainfo = cinfo->args + i;
2107                 MonoType *arg_type;
2108                 inst = cfg->args [i];
2109
2110                 if (sig->hasthis && (i == 0))
2111                         arg_type = &mono_defaults.object_class->byval_arg;
2112                 else
2113                         arg_type = sig->params [i - sig->hasthis];
2114
2115                 /*
2116                  * On x86, the arguments are either in their original stack locations, or in
2117                  * global regs.
2118                  */
2119                 if (inst->opcode == OP_REGVAR) {
2120                         g_assert (ainfo->storage == ArgOnStack);
2121                         
2122                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2123                 }
2124         }
2125
2126         return code;
2127 }
2128
2129 #define REAL_PRINT_REG(text,reg) \
2130 mono_assert (reg >= 0); \
2131 x86_push_reg (code, X86_EAX); \
2132 x86_push_reg (code, X86_EDX); \
2133 x86_push_reg (code, X86_ECX); \
2134 x86_push_reg (code, reg); \
2135 x86_push_imm (code, reg); \
2136 x86_push_imm (code, text " %d %p\n"); \
2137 x86_mov_reg_imm (code, X86_EAX, printf); \
2138 x86_call_reg (code, X86_EAX); \
2139 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2140 x86_pop_reg (code, X86_ECX); \
2141 x86_pop_reg (code, X86_EDX); \
2142 x86_pop_reg (code, X86_EAX);
2143
2144 /* benchmark and set based on cpu */
2145 #define LOOP_ALIGNMENT 8
2146 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2147
2148 void
2149 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2150 {
2151         MonoInst *ins;
2152         MonoCallInst *call;
2153         guint offset;
2154         guint8 *code = cfg->native_code + cfg->code_len;
2155         int max_len, cpos;
2156
2157         if (cfg->opt & MONO_OPT_LOOP) {
2158                 int pad, align = LOOP_ALIGNMENT;
2159                 /* set alignment depending on cpu */
2160                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2161                         pad = align - pad;
2162                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2163                         x86_padding (code, pad);
2164                         cfg->code_len += pad;
2165                         bb->native_offset = cfg->code_len;
2166                 }
2167         }
2168
2169         if (cfg->verbose_level > 2)
2170                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2171
2172         cpos = bb->max_offset;
2173
2174         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2175                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2176                 g_assert (!cfg->compile_aot);
2177                 cpos += 6;
2178
2179                 cov->data [bb->dfn].cil_code = bb->cil_code;
2180                 /* this is not thread save, but good enough */
2181                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2182         }
2183
2184         offset = code - cfg->native_code;
2185
2186         mono_debug_open_block (cfg, bb, offset);
2187
2188         MONO_BB_FOR_EACH_INS (bb, ins) {
2189                 offset = code - cfg->native_code;
2190
2191                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2192
2193                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2194                         cfg->code_size *= 2;
2195                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2196                         code = cfg->native_code + offset;
2197                         mono_jit_stats.code_reallocs++;
2198                 }
2199
2200                 if (cfg->debug_info)
2201                         mono_debug_record_line_number (cfg, ins, offset);
2202
2203                 switch (ins->opcode) {
2204                 case OP_BIGMUL:
2205                         x86_mul_reg (code, ins->sreg2, TRUE);
2206                         break;
2207                 case OP_BIGMUL_UN:
2208                         x86_mul_reg (code, ins->sreg2, FALSE);
2209                         break;
2210                 case OP_X86_SETEQ_MEMBASE:
2211                 case OP_X86_SETNE_MEMBASE:
2212                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2213                                          ins->inst_basereg, ins->inst_offset, TRUE);
2214                         break;
2215                 case OP_STOREI1_MEMBASE_IMM:
2216                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2217                         break;
2218                 case OP_STOREI2_MEMBASE_IMM:
2219                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2220                         break;
2221                 case OP_STORE_MEMBASE_IMM:
2222                 case OP_STOREI4_MEMBASE_IMM:
2223                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2224                         break;
2225                 case OP_STOREI1_MEMBASE_REG:
2226                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2227                         break;
2228                 case OP_STOREI2_MEMBASE_REG:
2229                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2230                         break;
2231                 case OP_STORE_MEMBASE_REG:
2232                 case OP_STOREI4_MEMBASE_REG:
2233                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2234                         break;
2235                 case OP_STORE_MEM_IMM:
2236                         x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2237                         break;
2238                 case OP_LOADU4_MEM:
2239                         if (cfg->new_ir)
2240                                 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2241                         else
2242                                 x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2243                         break;
2244                 case OP_LOAD_MEM:
2245                 case OP_LOADI4_MEM:
2246                         /* These are created by the cprop pass so they use inst_imm as the source */
2247                         x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2248                         break;
2249                 case OP_LOADU1_MEM:
2250                         x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2251                         break;
2252                 case OP_LOADU2_MEM:
2253                         x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2254                         break;
2255                 case OP_LOAD_MEMBASE:
2256                 case OP_LOADI4_MEMBASE:
2257                 case OP_LOADU4_MEMBASE:
2258                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2259                         break;
2260                 case OP_LOADU1_MEMBASE:
2261                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2262                         break;
2263                 case OP_LOADI1_MEMBASE:
2264                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2265                         break;
2266                 case OP_LOADU2_MEMBASE:
2267                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2268                         break;
2269                 case OP_LOADI2_MEMBASE:
2270                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2271                         break;
2272                 case OP_ICONV_TO_I1:
2273                 case OP_SEXT_I1:
2274                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2275                         break;
2276                 case OP_ICONV_TO_I2:
2277                 case OP_SEXT_I2:
2278                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2279                         break;
2280                 case OP_ICONV_TO_U1:
2281                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2282                         break;
2283                 case OP_ICONV_TO_U2:
2284                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2285                         break;
2286                 case OP_COMPARE:
2287                 case OP_ICOMPARE:
2288                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2289                         break;
2290                 case OP_COMPARE_IMM:
2291                 case OP_ICOMPARE_IMM:
2292                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2293                         break;
2294                 case OP_X86_COMPARE_MEMBASE_REG:
2295                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2296                         break;
2297                 case OP_X86_COMPARE_MEMBASE_IMM:
2298                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2299                         break;
2300                 case OP_X86_COMPARE_MEMBASE8_IMM:
2301                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2302                         break;
2303                 case OP_X86_COMPARE_REG_MEMBASE:
2304                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2305                         break;
2306                 case OP_X86_COMPARE_MEM_IMM:
2307                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2308                         break;
2309                 case OP_X86_TEST_NULL:
2310                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2311                         break;
2312                 case OP_X86_ADD_MEMBASE_IMM:
2313                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2314                         break;
2315                 case OP_X86_ADD_REG_MEMBASE:
2316                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2317                         break;
2318                 case OP_X86_SUB_MEMBASE_IMM:
2319                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2320                         break;
2321                 case OP_X86_SUB_REG_MEMBASE:
2322                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2323                         break;
2324                 case OP_X86_AND_MEMBASE_IMM:
2325                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2326                         break;
2327                 case OP_X86_OR_MEMBASE_IMM:
2328                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2329                         break;
2330                 case OP_X86_XOR_MEMBASE_IMM:
2331                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2332                         break;
2333                 case OP_X86_ADD_MEMBASE_REG:
2334                         x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2335                         break;
2336                 case OP_X86_SUB_MEMBASE_REG:
2337                         x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2338                         break;
2339                 case OP_X86_AND_MEMBASE_REG:
2340                         x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2341                         break;
2342                 case OP_X86_OR_MEMBASE_REG:
2343                         x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2344                         break;
2345                 case OP_X86_XOR_MEMBASE_REG:
2346                         x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2347                         break;
2348                 case OP_X86_INC_MEMBASE:
2349                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2350                         break;
2351                 case OP_X86_INC_REG:
2352                         x86_inc_reg (code, ins->dreg);
2353                         break;
2354                 case OP_X86_DEC_MEMBASE:
2355                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2356                         break;
2357                 case OP_X86_DEC_REG:
2358                         x86_dec_reg (code, ins->dreg);
2359                         break;
2360                 case OP_X86_MUL_REG_MEMBASE:
2361                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2362                         break;
2363                 case OP_X86_AND_REG_MEMBASE:
2364                         x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2365                         break;
2366                 case OP_X86_OR_REG_MEMBASE:
2367                         x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2368                         break;
2369                 case OP_X86_XOR_REG_MEMBASE:
2370                         x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2371                         break;
2372                 case OP_BREAK:
2373                         x86_breakpoint (code);
2374                         break;
2375                 case OP_RELAXED_NOP:
2376                         x86_prefix (code, X86_REP_PREFIX);
2377                         x86_nop (code);
2378                         break;
2379                 case OP_HARD_NOP:
2380                         x86_nop (code);
2381                         break;
2382                 case OP_NOP:
2383                 case OP_DUMMY_USE:
2384                 case OP_DUMMY_STORE:
2385                 case OP_NOT_REACHED:
2386                 case OP_NOT_NULL:
2387                         break;
2388                 case OP_ADDCC:
2389                 case OP_IADDCC:
2390                 case OP_IADD:
2391                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2392                         break;
2393                 case OP_ADC:
2394                 case OP_IADC:
2395                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2396                         break;
2397                 case OP_ADDCC_IMM:
2398                 case OP_ADD_IMM:
2399                 case OP_IADD_IMM:
2400                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2401                         break;
2402                 case OP_ADC_IMM:
2403                 case OP_IADC_IMM:
2404                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2405                         break;
2406                 case OP_SUBCC:
2407                 case OP_ISUBCC:
2408                 case OP_ISUB:
2409                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2410                         break;
2411                 case OP_SBB:
2412                 case OP_ISBB:
2413                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2414                         break;
2415                 case OP_SUBCC_IMM:
2416                 case OP_SUB_IMM:
2417                 case OP_ISUB_IMM:
2418                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2419                         break;
2420                 case OP_SBB_IMM:
2421                 case OP_ISBB_IMM:
2422                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2423                         break;
2424                 case OP_IAND:
2425                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2426                         break;
2427                 case OP_AND_IMM:
2428                 case OP_IAND_IMM:
2429                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2430                         break;
2431                 case OP_IDIV:
2432                 case OP_IREM:
2433                         /* 
2434                          * The code is the same for div/rem, the allocator will allocate dreg
2435                          * to RAX/RDX as appropriate.
2436                          */
2437                         if (ins->sreg2 == X86_EDX) {
2438                                 /* cdq clobbers this */
2439                                 x86_push_reg (code, ins->sreg2);
2440                                 x86_cdq (code);
2441                                 x86_div_membase (code, X86_ESP, 0, TRUE);
2442                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
2443                         } else {
2444                                 x86_cdq (code);
2445                                 x86_div_reg (code, ins->sreg2, TRUE);
2446                         }
2447                         break;
2448                 case OP_IDIV_UN:
2449                 case OP_IREM_UN:
2450                         if (ins->sreg2 == X86_EDX) {
2451                                 x86_push_reg (code, ins->sreg2);
2452                                 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2453                                 x86_div_membase (code, X86_ESP, 0, FALSE);
2454                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
2455                         } else {
2456                                 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2457                                 x86_div_reg (code, ins->sreg2, FALSE);
2458                         }
2459                         break;
2460                 case OP_DIV_IMM:
2461                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2462                         x86_cdq (code);
2463                         x86_div_reg (code, ins->sreg2, TRUE);
2464                         break;
2465                 case OP_IREM_IMM: {
2466                         int power = mono_is_power_of_two (ins->inst_imm);
2467
2468                         g_assert (ins->sreg1 == X86_EAX);
2469                         g_assert (ins->dreg == X86_EAX);
2470                         g_assert (power >= 0);
2471
2472                         if (power == 1) {
2473                                 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2474                                 x86_cdq (code);
2475                                 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2476                                 /* 
2477                                  * If the divident is >= 0, this does not nothing. If it is positive, it
2478                                  * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2479                                  */
2480                                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2481                                 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2482                         } else {
2483                                 /* Based on gcc code */
2484
2485                                 /* Add compensation for negative dividents */
2486                                 x86_cdq (code);
2487                                 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2488                                 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2489                                 /* Compute remainder */
2490                                 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2491                                 /* Remove compensation */
2492                                 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2493                         }
2494                         break;
2495                 }
2496                 case OP_IOR:
2497                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2498                         break;
2499                 case OP_OR_IMM:
2500                 case OP_IOR_IMM:
2501                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2502                         break;
2503                 case OP_IXOR:
2504                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2505                         break;
2506                 case OP_XOR_IMM:
2507                 case OP_IXOR_IMM:
2508                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2509                         break;
2510                 case OP_ISHL:
2511                         g_assert (ins->sreg2 == X86_ECX);
2512                         x86_shift_reg (code, X86_SHL, ins->dreg);
2513                         break;
2514                 case OP_ISHR:
2515                         g_assert (ins->sreg2 == X86_ECX);
2516                         x86_shift_reg (code, X86_SAR, ins->dreg);
2517                         break;
2518                 case OP_SHR_IMM:
2519                 case OP_ISHR_IMM:
2520                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2521                         break;
2522                 case OP_SHR_UN_IMM:
2523                 case OP_ISHR_UN_IMM:
2524                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2525                         break;
2526                 case OP_ISHR_UN:
2527                         g_assert (ins->sreg2 == X86_ECX);
2528                         x86_shift_reg (code, X86_SHR, ins->dreg);
2529                         break;
2530                 case OP_SHL_IMM:
2531                 case OP_ISHL_IMM:
2532                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2533                         break;
2534                 case OP_LSHL: {
2535                         guint8 *jump_to_end;
2536
2537                         /* handle shifts below 32 bits */
2538                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2539                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2540
2541                         x86_test_reg_imm (code, X86_ECX, 32);
2542                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2543
2544                         /* handle shift over 32 bit */
2545                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2546                         x86_clear_reg (code, ins->sreg1);
2547                         
2548                         x86_patch (jump_to_end, code);
2549                         }
2550                         break;
2551                 case OP_LSHR: {
2552                         guint8 *jump_to_end;
2553
2554                         /* handle shifts below 32 bits */
2555                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2556                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2557
2558                         x86_test_reg_imm (code, X86_ECX, 32);
2559                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2560
2561                         /* handle shifts over 31 bits */
2562                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2563                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2564                         
2565                         x86_patch (jump_to_end, code);
2566                         }
2567                         break;
2568                 case OP_LSHR_UN: {
2569                         guint8 *jump_to_end;
2570
2571                         /* handle shifts below 32 bits */
2572                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2573                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2574
2575                         x86_test_reg_imm (code, X86_ECX, 32);
2576                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2577
2578                         /* handle shifts over 31 bits */
2579                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2580                         x86_clear_reg (code, ins->backend.reg3);
2581                         
2582                         x86_patch (jump_to_end, code);
2583                         }
2584                         break;
2585                 case OP_LSHL_IMM:
2586                         if (ins->inst_imm >= 32) {
2587                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2588                                 x86_clear_reg (code, ins->sreg1);
2589                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2590                         } else {
2591                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2592                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2593                         }
2594                         break;
2595                 case OP_LSHR_IMM:
2596                         if (ins->inst_imm >= 32) {
2597                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2598                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2599                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2600                         } else {
2601                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2602                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2603                         }
2604                         break;
2605                 case OP_LSHR_UN_IMM:
2606                         if (ins->inst_imm >= 32) {
2607                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2608                                 x86_clear_reg (code, ins->backend.reg3);
2609                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2610                         } else {
2611                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2612                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2613                         }
2614                         break;
2615                 case OP_INOT:
2616                         x86_not_reg (code, ins->sreg1);
2617                         break;
2618                 case OP_INEG:
2619                         x86_neg_reg (code, ins->sreg1);
2620                         break;
2621
2622                 case OP_IMUL:
2623                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2624                         break;
2625                 case OP_MUL_IMM:
2626                 case OP_IMUL_IMM:
2627                         switch (ins->inst_imm) {
2628                         case 2:
2629                                 /* MOV r1, r2 */
2630                                 /* ADD r1, r1 */
2631                                 if (ins->dreg != ins->sreg1)
2632                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2633                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2634                                 break;
2635                         case 3:
2636                                 /* LEA r1, [r2 + r2*2] */
2637                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2638                                 break;
2639                         case 5:
2640                                 /* LEA r1, [r2 + r2*4] */
2641                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2642                                 break;
2643                         case 6:
2644                                 /* LEA r1, [r2 + r2*2] */
2645                                 /* ADD r1, r1          */
2646                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2647                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2648                                 break;
2649                         case 9:
2650                                 /* LEA r1, [r2 + r2*8] */
2651                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2652                                 break;
2653                         case 10:
2654                                 /* LEA r1, [r2 + r2*4] */
2655                                 /* ADD r1, r1          */
2656                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2657                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2658                                 break;
2659                         case 12:
2660                                 /* LEA r1, [r2 + r2*2] */
2661                                 /* SHL r1, 2           */
2662                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2663                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2664                                 break;
2665                         case 25:
2666                                 /* LEA r1, [r2 + r2*4] */
2667                                 /* LEA r1, [r1 + r1*4] */
2668                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2669                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2670                                 break;
2671                         case 100:
2672                                 /* LEA r1, [r2 + r2*4] */
2673                                 /* SHL r1, 2           */
2674                                 /* LEA r1, [r1 + r1*4] */
2675                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2676                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2677                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2678                                 break;
2679                         default:
2680                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2681                                 break;
2682                         }
2683                         break;
2684                 case OP_IMUL_OVF:
2685                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2686                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2687                         break;
2688                 case OP_IMUL_OVF_UN: {
2689                         /* the mul operation and the exception check should most likely be split */
2690                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2691                         /*g_assert (ins->sreg2 == X86_EAX);
2692                         g_assert (ins->dreg == X86_EAX);*/
2693                         if (ins->sreg2 == X86_EAX) {
2694                                 non_eax_reg = ins->sreg1;
2695                         } else if (ins->sreg1 == X86_EAX) {
2696                                 non_eax_reg = ins->sreg2;
2697                         } else {
2698                                 /* no need to save since we're going to store to it anyway */
2699                                 if (ins->dreg != X86_EAX) {
2700                                         saved_eax = TRUE;
2701                                         x86_push_reg (code, X86_EAX);
2702                                 }
2703                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2704                                 non_eax_reg = ins->sreg2;
2705                         }
2706                         if (ins->dreg == X86_EDX) {
2707                                 if (!saved_eax) {
2708                                         saved_eax = TRUE;
2709                                         x86_push_reg (code, X86_EAX);
2710                                 }
2711                         } else if (ins->dreg != X86_EAX) {
2712                                 saved_edx = TRUE;
2713                                 x86_push_reg (code, X86_EDX);
2714                         }
2715                         x86_mul_reg (code, non_eax_reg, FALSE);
2716                         /* save before the check since pop and mov don't change the flags */
2717                         if (ins->dreg != X86_EAX)
2718                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2719                         if (saved_edx)
2720                                 x86_pop_reg (code, X86_EDX);
2721                         if (saved_eax)
2722                                 x86_pop_reg (code, X86_EAX);
2723                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2724                         break;
2725                 }
2726                 case OP_ICONST:
2727                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2728                         break;
2729                 case OP_AOTCONST:
2730                         g_assert_not_reached ();
2731                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2732                         x86_mov_reg_imm (code, ins->dreg, 0);
2733                         break;
2734                 case OP_JUMP_TABLE:
2735                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2736                         x86_mov_reg_imm (code, ins->dreg, 0);
2737                         break;
2738                 case OP_LOAD_GOTADDR:
2739                         x86_call_imm (code, 0);
2740                         /* 
2741                          * The patch needs to point to the pop, since the GOT offset needs 
2742                          * to be added to that address.
2743                          */
2744                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2745                         x86_pop_reg (code, ins->dreg);
2746                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2747                         break;
2748                 case OP_GOT_ENTRY:
2749                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2750                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2751                         break;
2752                 case OP_X86_PUSH_GOT_ENTRY:
2753                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2754                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2755                         break;
2756                 case OP_MOVE:
2757                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2758                         break;
2759                 case OP_JMP: {
2760                         /*
2761                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2762                          * Keep in sync with the code in emit_epilog.
2763                          */
2764                         int pos = 0;
2765
2766                         /* FIXME: no tracing support... */
2767                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2768                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2769                         /* reset offset to make max_len work */
2770                         offset = code - cfg->native_code;
2771
2772                         g_assert (!cfg->method->save_lmf);
2773
2774                         code = emit_load_volatile_arguments (cfg, code);
2775
2776                         if (cfg->used_int_regs & (1 << X86_EBX))
2777                                 pos -= 4;
2778                         if (cfg->used_int_regs & (1 << X86_EDI))
2779                                 pos -= 4;
2780                         if (cfg->used_int_regs & (1 << X86_ESI))
2781                                 pos -= 4;
2782                         if (pos)
2783                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2784         
2785                         if (cfg->used_int_regs & (1 << X86_ESI))
2786                                 x86_pop_reg (code, X86_ESI);
2787                         if (cfg->used_int_regs & (1 << X86_EDI))
2788                                 x86_pop_reg (code, X86_EDI);
2789                         if (cfg->used_int_regs & (1 << X86_EBX))
2790                                 x86_pop_reg (code, X86_EBX);
2791         
2792                         /* restore ESP/EBP */
2793                         x86_leave (code);
2794                         offset = code - cfg->native_code;
2795                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2796                         x86_jump32 (code, 0);
2797
2798                         cfg->disable_aot = TRUE;
2799                         break;
2800                 }
2801                 case OP_CHECK_THIS:
2802                         /* ensure ins->sreg1 is not NULL
2803                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2804                          * cmp DWORD PTR [eax], 0
2805                          */
2806                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2807                         break;
2808                 case OP_ARGLIST: {
2809                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2810                         x86_push_reg (code, hreg);
2811                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2812                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2813                         x86_pop_reg (code, hreg);
2814                         break;
2815                 }
2816                 case OP_FCALL:
2817                 case OP_LCALL:
2818                 case OP_VCALL:
2819                 case OP_VCALL2:
2820                 case OP_VOIDCALL:
2821                 case OP_CALL:
2822                         call = (MonoCallInst*)ins;
2823                         if (ins->flags & MONO_INST_HAS_METHOD)
2824                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2825                         else
2826                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2827                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2828                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2829                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2830                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2831                                  * smart enough to do that optimization yet
2832                                  *
2833                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2834                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2835                                  * (most likely from locality benefits). People with other processors should
2836                                  * check on theirs to see what happens.
2837                                  */
2838                                 if (call->stack_usage == 4) {
2839                                         /* we want to use registers that won't get used soon, so use
2840                                          * ecx, as eax will get allocated first. edx is used by long calls,
2841                                          * so we can't use that.
2842                                          */
2843                                         
2844                                         x86_pop_reg (code, X86_ECX);
2845                                 } else {
2846                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2847                                 }
2848                         }
2849                         code = emit_move_return_value (cfg, ins, code);
2850                         break;
2851                 case OP_FCALL_REG:
2852                 case OP_LCALL_REG:
2853                 case OP_VCALL_REG:
2854                 case OP_VCALL2_REG:
2855                 case OP_VOIDCALL_REG:
2856                 case OP_CALL_REG:
2857                         call = (MonoCallInst*)ins;
2858                         x86_call_reg (code, ins->sreg1);
2859                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2860                                 if (call->stack_usage == 4)
2861                                         x86_pop_reg (code, X86_ECX);
2862                                 else
2863                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2864                         }
2865                         code = emit_move_return_value (cfg, ins, code);
2866                         break;
2867                 case OP_FCALL_MEMBASE:
2868                 case OP_LCALL_MEMBASE:
2869                 case OP_VCALL_MEMBASE:
2870                 case OP_VCALL2_MEMBASE:
2871                 case OP_VOIDCALL_MEMBASE:
2872                 case OP_CALL_MEMBASE:
2873                         call = (MonoCallInst*)ins;
2874                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2875                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2876                                 if (call->stack_usage == 4)
2877                                         x86_pop_reg (code, X86_ECX);
2878                                 else
2879                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2880                         }
2881                         code = emit_move_return_value (cfg, ins, code);
2882                         break;
2883                 case OP_OUTARG:
2884                 case OP_X86_PUSH:
2885                         x86_push_reg (code, ins->sreg1);
2886                         break;
2887                 case OP_X86_PUSH_IMM:
2888                         x86_push_imm (code, ins->inst_imm);
2889                         break;
2890                 case OP_X86_PUSH_MEMBASE:
2891                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2892                         break;
2893                 case OP_X86_PUSH_OBJ: 
2894                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2895                         x86_push_reg (code, X86_EDI);
2896                         x86_push_reg (code, X86_ESI);
2897                         x86_push_reg (code, X86_ECX);
2898                         if (ins->inst_offset)
2899                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2900                         else
2901                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2902                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2903                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2904                         x86_cld (code);
2905                         x86_prefix (code, X86_REP_PREFIX);
2906                         x86_movsd (code);
2907                         x86_pop_reg (code, X86_ECX);
2908                         x86_pop_reg (code, X86_ESI);
2909                         x86_pop_reg (code, X86_EDI);
2910                         break;
2911                 case OP_X86_LEA:
2912                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2913                         break;
2914                 case OP_X86_LEA_MEMBASE:
2915                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2916                         break;
2917                 case OP_X86_XCHG:
2918                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2919                         break;
2920                 case OP_LOCALLOC:
2921                         /* keep alignment */
2922                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2923                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2924                         code = mono_emit_stack_alloc (code, ins);
2925                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2926                         break;
2927                 case OP_LOCALLOC_IMM: {
2928                         guint32 size = ins->inst_imm;
2929                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2930
2931                         if (ins->flags & MONO_INST_INIT) {
2932                                 /* FIXME: Optimize this */
2933                                 x86_mov_reg_imm (code, ins->dreg, size);
2934                                 ins->sreg1 = ins->dreg;
2935
2936                                 code = mono_emit_stack_alloc (code, ins);
2937                                 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2938                         } else {
2939                                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
2940                                 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2941                         }
2942                         break;
2943                 }
2944                 case OP_THROW: {
2945                         x86_push_reg (code, ins->sreg1);
2946                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2947                                                           (gpointer)"mono_arch_throw_exception");
2948                         break;
2949                 }
2950                 case OP_RETHROW: {
2951                         x86_push_reg (code, ins->sreg1);
2952                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2953                                                           (gpointer)"mono_arch_rethrow_exception");
2954                         break;
2955                 }
2956                 case OP_CALL_HANDLER:
2957                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2958                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2959                         x86_call_imm (code, 0);
2960                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2961                         break;
2962                 case OP_START_HANDLER: {
2963                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2964                         x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
2965                         break;
2966                 }
2967                 case OP_ENDFINALLY: {
2968                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2969                         x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2970                         x86_ret (code);
2971                         break;
2972                 }
2973                 case OP_ENDFILTER: {
2974                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2975                         x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2976                         /* The local allocator will put the result into EAX */
2977                         x86_ret (code);
2978                         break;
2979                 }
2980
2981                 case OP_LABEL:
2982                         ins->inst_c0 = code - cfg->native_code;
2983                         break;
2984                 case OP_BR:
2985                         if (ins->flags & MONO_INST_BRLABEL) {
2986                                 if (ins->inst_i0->inst_c0) {
2987                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2988                                 } else {
2989                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2990                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2991                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2992                                                 x86_jump8 (code, 0);
2993                                         else 
2994                                                 x86_jump32 (code, 0);
2995                                 }
2996                         } else {
2997                                 if (ins->inst_target_bb->native_offset) {
2998                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2999                                 } else {
3000                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3001                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3002                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3003                                                 x86_jump8 (code, 0);
3004                                         else 
3005                                                 x86_jump32 (code, 0);
3006                                 } 
3007                         }
3008                         break;
3009                 case OP_BR_REG:
3010                         x86_jump_reg (code, ins->sreg1);
3011                         break;
3012                 case OP_CEQ:
3013                 case OP_CLT:
3014                 case OP_CLT_UN:
3015                 case OP_CGT:
3016                 case OP_CGT_UN:
3017                 case OP_CNE:
3018                 case OP_ICEQ:
3019                 case OP_ICLT:
3020                 case OP_ICLT_UN:
3021                 case OP_ICGT:
3022                 case OP_ICGT_UN:
3023                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3024                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3025                         break;
3026                 case OP_COND_EXC_EQ:
3027                 case OP_COND_EXC_NE_UN:
3028                 case OP_COND_EXC_LT:
3029                 case OP_COND_EXC_LT_UN:
3030                 case OP_COND_EXC_GT:
3031                 case OP_COND_EXC_GT_UN:
3032                 case OP_COND_EXC_GE:
3033                 case OP_COND_EXC_GE_UN:
3034                 case OP_COND_EXC_LE:
3035                 case OP_COND_EXC_LE_UN:
3036                 case OP_COND_EXC_IEQ:
3037                 case OP_COND_EXC_INE_UN:
3038                 case OP_COND_EXC_ILT:
3039                 case OP_COND_EXC_ILT_UN:
3040                 case OP_COND_EXC_IGT:
3041                 case OP_COND_EXC_IGT_UN:
3042                 case OP_COND_EXC_IGE:
3043                 case OP_COND_EXC_IGE_UN:
3044                 case OP_COND_EXC_ILE:
3045                 case OP_COND_EXC_ILE_UN:
3046                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
3047                         break;
3048                 case OP_COND_EXC_OV:
3049                 case OP_COND_EXC_NO:
3050                 case OP_COND_EXC_C:
3051                 case OP_COND_EXC_NC:
3052                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3053                         break;
3054                 case OP_COND_EXC_IOV:
3055                 case OP_COND_EXC_INO:
3056                 case OP_COND_EXC_IC:
3057                 case OP_COND_EXC_INC:
3058                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
3059                         break;
3060                 case OP_IBEQ:
3061                 case OP_IBNE_UN:
3062                 case OP_IBLT:
3063                 case OP_IBLT_UN:
3064                 case OP_IBGT:
3065                 case OP_IBGT_UN:
3066                 case OP_IBGE:
3067                 case OP_IBGE_UN:
3068                 case OP_IBLE:
3069                 case OP_IBLE_UN:
3070                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
3071                         break;
3072
3073                 case OP_CMOV_IEQ:
3074                 case OP_CMOV_IGE:
3075                 case OP_CMOV_IGT:
3076                 case OP_CMOV_ILE:
3077                 case OP_CMOV_ILT:
3078                 case OP_CMOV_INE_UN:
3079                 case OP_CMOV_IGE_UN:
3080                 case OP_CMOV_IGT_UN:
3081                 case OP_CMOV_ILE_UN:
3082                 case OP_CMOV_ILT_UN:
3083                         g_assert (ins->dreg == ins->sreg1);
3084                         x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
3085                         break;
3086
3087                 /* floating point opcodes */
3088                 case OP_R8CONST: {
3089                         double d = *(double *)ins->inst_p0;
3090
3091                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3092                                 x86_fldz (code);
3093                         } else if (d == 1.0) {
3094                                 x86_fld1 (code);
3095                         } else {
3096                                 if (cfg->compile_aot) {
3097                                         guint32 *val = (guint32*)&d;
3098                                         x86_push_imm (code, val [1]);
3099                                         x86_push_imm (code, val [0]);
3100                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3101                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3102                                 }
3103                                 else {
3104                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3105                                         x86_fld (code, NULL, TRUE);
3106                                 }
3107                         }
3108                         break;
3109                 }
3110                 case OP_R4CONST: {
3111                         float f = *(float *)ins->inst_p0;
3112
3113                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3114                                 x86_fldz (code);
3115                         } else if (f == 1.0) {
3116                                 x86_fld1 (code);
3117                         } else {
3118                                 if (cfg->compile_aot) {
3119                                         guint32 val = *(guint32*)&f;
3120                                         x86_push_imm (code, val);
3121                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3122                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3123                                 }
3124                                 else {
3125                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3126                                         x86_fld (code, NULL, FALSE);
3127                                 }
3128                         }
3129                         break;
3130                 }
3131                 case OP_STORER8_MEMBASE_REG:
3132                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3133                         break;
3134                 case OP_LOADR8_SPILL_MEMBASE:
3135                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3136                         x86_fxch (code, 1);
3137                         break;
3138                 case OP_LOADR8_MEMBASE:
3139                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3140                         break;
3141                 case OP_STORER4_MEMBASE_REG:
3142                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3143                         break;
3144                 case OP_LOADR4_MEMBASE:
3145                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3146                         break;
3147                 case OP_ICONV_TO_R4: /* FIXME: change precision */
3148                 case OP_ICONV_TO_R8:
3149                         x86_push_reg (code, ins->sreg1);
3150                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3151                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3152                         break;
3153                 case OP_ICONV_TO_R_UN:
3154                         x86_push_imm (code, 0);
3155                         x86_push_reg (code, ins->sreg1);
3156                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3157                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3158                         break;
3159                 case OP_X86_FP_LOAD_I8:
3160                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3161                         break;
3162                 case OP_X86_FP_LOAD_I4:
3163                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3164                         break;
3165                 case OP_FCONV_TO_R4:
3166                         /* FIXME: nothing to do ?? */
3167                         break;
3168                 case OP_FCONV_TO_I1:
3169                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3170                         break;
3171                 case OP_FCONV_TO_U1:
3172                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3173                         break;
3174                 case OP_FCONV_TO_I2:
3175                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3176                         break;
3177                 case OP_FCONV_TO_U2:
3178                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3179                         break;
3180                 case OP_FCONV_TO_I4:
3181                 case OP_FCONV_TO_I:
3182                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3183                         break;
3184                 case OP_FCONV_TO_I8:
3185                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3186                         x86_fnstcw_membase(code, X86_ESP, 0);
3187                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3188                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3189                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3190                         x86_fldcw_membase (code, X86_ESP, 2);
3191                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3192                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3193                         x86_pop_reg (code, ins->dreg);
3194                         x86_pop_reg (code, ins->backend.reg3);
3195                         x86_fldcw_membase (code, X86_ESP, 0);
3196                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3197                         break;
3198                 case OP_LCONV_TO_R8_2:
3199                         x86_push_reg (code, ins->sreg2);
3200                         x86_push_reg (code, ins->sreg1);
3201                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3202                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3203                         break;
3204                 case OP_LCONV_TO_R4_2:
3205                         x86_push_reg (code, ins->sreg2);
3206                         x86_push_reg (code, ins->sreg1);
3207                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3208                         /* Change precision */
3209                         x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3210                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3211                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3212                         break;
3213                 case OP_LCONV_TO_R_UN:
3214                 case OP_LCONV_TO_R_UN_2: { 
3215                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3216                         guint8 *br;
3217
3218                         /* load 64bit integer to FP stack */
3219                         x86_push_imm (code, 0);
3220                         x86_push_reg (code, ins->sreg2);
3221                         x86_push_reg (code, ins->sreg1);
3222                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3223                         /* store as 80bit FP value */
3224                         x86_fst80_membase (code, X86_ESP, 0);
3225                         
3226                         /* test if lreg is negative */
3227                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3228                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3229         
3230                         /* add correction constant mn */
3231                         x86_fld80_mem (code, mn);
3232                         x86_fld80_membase (code, X86_ESP, 0);
3233                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3234                         x86_fst80_membase (code, X86_ESP, 0);
3235
3236                         x86_patch (br, code);
3237
3238                         x86_fld80_membase (code, X86_ESP, 0);
3239                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3240
3241                         break;
3242                 }
3243                 case OP_LCONV_TO_OVF_I:
3244                 case OP_LCONV_TO_OVF_I4_2: {
3245                         guint8 *br [3], *label [1];
3246                         MonoInst *tins;
3247
3248                         /* 
3249                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3250                          */
3251                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3252
3253                         /* If the low word top bit is set, see if we are negative */
3254                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3255                         /* We are not negative (no top bit set, check for our top word to be zero */
3256                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3257                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3258                         label [0] = code;
3259
3260                         /* throw exception */
3261                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3262                         if (tins) {
3263                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3264                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3265                                         x86_jump8 (code, 0);
3266                                 else
3267                                         x86_jump32 (code, 0);
3268                         } else {
3269                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3270                                 x86_jump32 (code, 0);
3271                         }
3272         
3273         
3274                         x86_patch (br [0], code);
3275                         /* our top bit is set, check that top word is 0xfffffff */
3276                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3277                 
3278                         x86_patch (br [1], code);
3279                         /* nope, emit exception */
3280                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3281                         x86_patch (br [2], label [0]);
3282
3283                         if (ins->dreg != ins->sreg1)
3284                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3285                         break;
3286                 }
3287                 case OP_FMOVE:
3288                         /* Not needed on the fp stack */
3289                         break;
3290                 case OP_FADD:
3291                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3292                         break;
3293                 case OP_FSUB:
3294                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3295                         break;          
3296                 case OP_FMUL:
3297                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3298                         break;          
3299                 case OP_FDIV:
3300                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3301                         break;          
3302                 case OP_FNEG:
3303                         x86_fchs (code);
3304                         break;          
3305                 case OP_SIN:
3306                         x86_fsin (code);
3307                         x86_fldz (code);
3308                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3309                         break;          
3310                 case OP_COS:
3311                         x86_fcos (code);
3312                         x86_fldz (code);
3313                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3314                         break;          
3315                 case OP_ABS:
3316                         x86_fabs (code);
3317                         break;          
3318                 case OP_TAN: {
3319                         /* 
3320                          * it really doesn't make sense to inline all this code,
3321                          * it's here just to show that things may not be as simple 
3322                          * as they appear.
3323                          */
3324                         guchar *check_pos, *end_tan, *pop_jump;
3325                         x86_push_reg (code, X86_EAX);
3326                         x86_fptan (code);
3327                         x86_fnstsw (code);
3328                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3329                         check_pos = code;
3330                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3331                         x86_fstp (code, 0); /* pop the 1.0 */
3332                         end_tan = code;
3333                         x86_jump8 (code, 0);
3334                         x86_fldpi (code);
3335                         x86_fp_op (code, X86_FADD, 0);
3336                         x86_fxch (code, 1);
3337                         x86_fprem1 (code);
3338                         x86_fstsw (code);
3339                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3340                         pop_jump = code;
3341                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3342                         x86_fstp (code, 1);
3343                         x86_fptan (code);
3344                         x86_patch (pop_jump, code);
3345                         x86_fstp (code, 0); /* pop the 1.0 */
3346                         x86_patch (check_pos, code);
3347                         x86_patch (end_tan, code);
3348                         x86_fldz (code);
3349                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3350                         x86_pop_reg (code, X86_EAX);
3351                         break;
3352                 }
3353                 case OP_ATAN:
3354                         x86_fld1 (code);
3355                         x86_fpatan (code);
3356                         x86_fldz (code);
3357                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3358                         break;          
3359                 case OP_SQRT:
3360                         x86_fsqrt (code);
3361                         break;
3362                 case OP_IMIN:
3363                         g_assert (cfg->opt & MONO_OPT_CMOV);
3364                         g_assert (ins->dreg == ins->sreg1);
3365                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3366                         x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3367                         break;
3368                 case OP_IMIN_UN:
3369                         g_assert (cfg->opt & MONO_OPT_CMOV);
3370                         g_assert (ins->dreg == ins->sreg1);
3371                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3372                         x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3373                         break;
3374                 case OP_IMAX:
3375                         g_assert (cfg->opt & MONO_OPT_CMOV);
3376                         g_assert (ins->dreg == ins->sreg1);
3377                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3378                         x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3379                         break;
3380                 case OP_IMAX_UN:
3381                         g_assert (cfg->opt & MONO_OPT_CMOV);
3382                         g_assert (ins->dreg == ins->sreg1);
3383                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3384                         x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3385                         break;
3386                 case OP_X86_FPOP:
3387                         x86_fstp (code, 0);
3388                         break;
3389                 case OP_X86_FXCH:
3390                         x86_fxch (code, ins->inst_imm);
3391                         break;
3392                 case OP_FREM: {
3393                         guint8 *l1, *l2;
3394
3395                         x86_push_reg (code, X86_EAX);
3396                         /* we need to exchange ST(0) with ST(1) */
3397                         x86_fxch (code, 1);
3398
3399                         /* this requires a loop, because fprem somtimes 
3400                          * returns a partial remainder */
3401                         l1 = code;
3402                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3403                         /* x86_fprem1 (code); */
3404                         x86_fprem (code);
3405                         x86_fnstsw (code);
3406                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3407                         l2 = code + 2;
3408                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3409
3410                         /* pop result */
3411                         x86_fstp (code, 1);
3412
3413                         x86_pop_reg (code, X86_EAX);
3414                         break;
3415                 }
3416                 case OP_FCOMPARE:
3417                         if (cfg->opt & MONO_OPT_FCMOV) {
3418                                 x86_fcomip (code, 1);
3419                                 x86_fstp (code, 0);
3420                                 break;
3421                         }
3422                         /* this overwrites EAX */
3423                         EMIT_FPCOMPARE(code);
3424                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3425                         break;
3426                 case OP_FCEQ:
3427                         if (cfg->opt & MONO_OPT_FCMOV) {
3428                                 /* zeroing the register at the start results in 
3429                                  * shorter and faster code (we can also remove the widening op)
3430                                  */
3431                                 guchar *unordered_check;
3432                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3433                                 x86_fcomip (code, 1);
3434                                 x86_fstp (code, 0);
3435                                 unordered_check = code;
3436                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3437                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3438                                 x86_patch (unordered_check, code);
3439                                 break;
3440                         }
3441                         if (ins->dreg != X86_EAX) 
3442                                 x86_push_reg (code, X86_EAX);
3443
3444                         EMIT_FPCOMPARE(code);
3445                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3446                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3447                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3448                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3449
3450                         if (ins->dreg != X86_EAX) 
3451                                 x86_pop_reg (code, X86_EAX);
3452                         break;
3453                 case OP_FCLT:
3454                 case OP_FCLT_UN:
3455                         if (cfg->opt & MONO_OPT_FCMOV) {
3456                                 /* zeroing the register at the start results in 
3457                                  * shorter and faster code (we can also remove the widening op)
3458                                  */
3459                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3460                                 x86_fcomip (code, 1);
3461                                 x86_fstp (code, 0);
3462                                 if (ins->opcode == OP_FCLT_UN) {
3463                                         guchar *unordered_check = code;
3464                                         guchar *jump_to_end;
3465                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3466                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3467                                         jump_to_end = code;
3468                                         x86_jump8 (code, 0);
3469                                         x86_patch (unordered_check, code);
3470                                         x86_inc_reg (code, ins->dreg);
3471                                         x86_patch (jump_to_end, code);
3472                                 } else {
3473                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3474                                 }
3475                                 break;
3476                         }
3477                         if (ins->dreg != X86_EAX) 
3478                                 x86_push_reg (code, X86_EAX);
3479
3480                         EMIT_FPCOMPARE(code);
3481                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3482                         if (ins->opcode == OP_FCLT_UN) {
3483                                 guchar *is_not_zero_check, *end_jump;
3484                                 is_not_zero_check = code;
3485                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3486                                 end_jump = code;
3487                                 x86_jump8 (code, 0);
3488                                 x86_patch (is_not_zero_check, code);
3489                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3490
3491                                 x86_patch (end_jump, code);
3492                         }
3493                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3494                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3495
3496                         if (ins->dreg != X86_EAX) 
3497                                 x86_pop_reg (code, X86_EAX);
3498                         break;
3499                 case OP_FCGT:
3500                 case OP_FCGT_UN:
3501                         if (cfg->opt & MONO_OPT_FCMOV) {
3502                                 /* zeroing the register at the start results in 
3503                                  * shorter and faster code (we can also remove the widening op)
3504                                  */
3505                                 guchar *unordered_check;
3506                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3507                                 x86_fcomip (code, 1);
3508                                 x86_fstp (code, 0);
3509                                 if (ins->opcode == OP_FCGT) {
3510                                         unordered_check = code;
3511                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3512                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3513                                         x86_patch (unordered_check, code);
3514                                 } else {
3515                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3516                                 }
3517                                 break;
3518                         }
3519                         if (ins->dreg != X86_EAX) 
3520                                 x86_push_reg (code, X86_EAX);
3521
3522                         EMIT_FPCOMPARE(code);
3523                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3524                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3525                         if (ins->opcode == OP_FCGT_UN) {
3526                                 guchar *is_not_zero_check, *end_jump;
3527                                 is_not_zero_check = code;
3528                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3529                                 end_jump = code;
3530                                 x86_jump8 (code, 0);
3531                                 x86_patch (is_not_zero_check, code);
3532                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3533         
3534                                 x86_patch (end_jump, code);
3535                         }
3536                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3537                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3538
3539                         if (ins->dreg != X86_EAX) 
3540                                 x86_pop_reg (code, X86_EAX);
3541                         break;
3542                 case OP_FBEQ:
3543                         if (cfg->opt & MONO_OPT_FCMOV) {
3544                                 guchar *jump = code;
3545                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3546                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3547                                 x86_patch (jump, code);
3548                                 break;
3549                         }
3550                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3551                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3552                         break;
3553                 case OP_FBNE_UN:
3554                         /* Branch if C013 != 100 */
3555                         if (cfg->opt & MONO_OPT_FCMOV) {
3556                                 /* branch if !ZF or (PF|CF) */
3557                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3558                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3559                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3560                                 break;
3561                         }
3562                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3563                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3564                         break;
3565                 case OP_FBLT:
3566                         if (cfg->opt & MONO_OPT_FCMOV) {
3567                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3568                                 break;
3569                         }
3570                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3571                         break;
3572                 case OP_FBLT_UN:
3573                         if (cfg->opt & MONO_OPT_FCMOV) {
3574                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3575                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3576                                 break;
3577                         }
3578                         if (ins->opcode == OP_FBLT_UN) {
3579                                 guchar *is_not_zero_check, *end_jump;
3580                                 is_not_zero_check = code;
3581                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3582                                 end_jump = code;
3583                                 x86_jump8 (code, 0);
3584                                 x86_patch (is_not_zero_check, code);
3585                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3586
3587                                 x86_patch (end_jump, code);
3588                         }
3589                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3590                         break;
3591                 case OP_FBGT:
3592                 case OP_FBGT_UN:
3593                         if (cfg->opt & MONO_OPT_FCMOV) {
3594                                 if (ins->opcode == OP_FBGT) {
3595                                         guchar *br1;
3596
3597                                         /* skip branch if C1=1 */
3598                                         br1 = code;
3599                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3600                                         /* branch if (C0 | C3) = 1 */
3601                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3602                                         x86_patch (br1, code);
3603                                 } else {
3604                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3605                                 }
3606                                 break;
3607                         }
3608                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3609                         if (ins->opcode == OP_FBGT_UN) {
3610                                 guchar *is_not_zero_check, *end_jump;
3611                                 is_not_zero_check = code;
3612                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3613                                 end_jump = code;
3614                                 x86_jump8 (code, 0);
3615                                 x86_patch (is_not_zero_check, code);
3616                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3617
3618                                 x86_patch (end_jump, code);
3619                         }
3620                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3621                         break;
3622                 case OP_FBGE:
3623                         /* Branch if C013 == 100 or 001 */
3624                         if (cfg->opt & MONO_OPT_FCMOV) {
3625                                 guchar *br1;
3626
3627                                 /* skip branch if C1=1 */
3628                                 br1 = code;
3629                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3630                                 /* branch if (C0 | C3) = 1 */
3631                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3632                                 x86_patch (br1, code);
3633                                 break;
3634                         }
3635                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3636                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3637                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3638                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3639                         break;
3640                 case OP_FBGE_UN:
3641                         /* Branch if C013 == 000 */
3642                         if (cfg->opt & MONO_OPT_FCMOV) {
3643                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3644                                 break;
3645                         }
3646                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3647                         break;
3648                 case OP_FBLE:
3649                         /* Branch if C013=000 or 100 */
3650                         if (cfg->opt & MONO_OPT_FCMOV) {
3651                                 guchar *br1;
3652
3653                                 /* skip branch if C1=1 */
3654                                 br1 = code;
3655                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3656                                 /* branch if C0=0 */
3657                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3658                                 x86_patch (br1, code);
3659                                 break;
3660                         }
3661                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3662                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3663                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3664                         break;
3665                 case OP_FBLE_UN:
3666                         /* Branch if C013 != 001 */
3667                         if (cfg->opt & MONO_OPT_FCMOV) {
3668                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3669                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3670                                 break;
3671                         }
3672                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3673                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3674                         break;
3675                 case OP_CKFINITE: {
3676                         guchar *br1;
3677                         x86_push_reg (code, X86_EAX);
3678                         x86_fxam (code);
3679                         x86_fnstsw (code);
3680                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3681                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3682                         x86_pop_reg (code, X86_EAX);
3683
3684                         /* Have to clean up the fp stack before throwing the exception */
3685                         br1 = code;
3686                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3687
3688                         x86_fstp (code, 0);                     
3689                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3690
3691                         x86_patch (br1, code);
3692                         break;
3693                 }
3694                 case OP_TLS_GET: {
3695                         code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3696                         break;
3697                 }
3698                 case OP_MEMORY_BARRIER: {
3699                         /* Not needed on x86 */
3700                         break;
3701                 }
3702                 case OP_ATOMIC_ADD_I4: {
3703                         int dreg = ins->dreg;
3704
3705                         if (dreg == ins->inst_basereg) {
3706                                 x86_push_reg (code, ins->sreg2);
3707                                 dreg = ins->sreg2;
3708                         } 
3709                         
3710                         if (dreg != ins->sreg2)
3711                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3712
3713                         x86_prefix (code, X86_LOCK_PREFIX);
3714                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3715
3716                         if (dreg != ins->dreg) {
3717                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3718                                 x86_pop_reg (code, dreg);
3719                         }
3720
3721                         break;
3722                 }
3723                 case OP_ATOMIC_ADD_NEW_I4: {
3724                         int dreg = ins->dreg;
3725
3726                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3727                         if (ins->sreg2 == dreg) {
3728                                 if (dreg == X86_EBX) {
3729                                         dreg = X86_EDI;
3730                                         if (ins->inst_basereg == X86_EDI)
3731                                                 dreg = X86_ESI;
3732                                 } else {
3733                                         dreg = X86_EBX;
3734                                         if (ins->inst_basereg == X86_EBX)
3735                                                 dreg = X86_EDI;
3736                                 }
3737                         } else if (ins->inst_basereg == dreg) {
3738                                 if (dreg == X86_EBX) {
3739                                         dreg = X86_EDI;
3740                                         if (ins->sreg2 == X86_EDI)
3741                                                 dreg = X86_ESI;
3742                                 } else {
3743                                         dreg = X86_EBX;
3744                                         if (ins->sreg2 == X86_EBX)
3745                                                 dreg = X86_EDI;
3746                                 }
3747                         }
3748
3749                         if (dreg != ins->dreg) {
3750                                 x86_push_reg (code, dreg);
3751                         }
3752
3753                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3754                         x86_prefix (code, X86_LOCK_PREFIX);
3755                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3756                         /* dreg contains the old value, add with sreg2 value */
3757                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3758                         
3759                         if (ins->dreg != dreg) {
3760                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3761                                 x86_pop_reg (code, dreg);
3762                         }
3763
3764                         break;
3765                 }
3766                 case OP_ATOMIC_EXCHANGE_I4:
3767                 case OP_ATOMIC_CAS_IMM_I4: {
3768                         guchar *br[2];
3769                         int sreg2 = ins->sreg2;
3770                         int breg = ins->inst_basereg;
3771
3772                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3773                          * hack to overcome limits in x86 reg allocator 
3774                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3775                          */
3776                         g_assert (ins->dreg == X86_EAX);
3777                         
3778                         /* We need the EAX reg for the cmpxchg */
3779                         if (ins->sreg2 == X86_EAX) {
3780                                 x86_push_reg (code, X86_EDX);
3781                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3782                                 sreg2 = X86_EDX;
3783                         }
3784
3785                         if (breg == X86_EAX) {
3786                                 x86_push_reg (code, X86_ESI);
3787                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3788                                 breg = X86_ESI;
3789                         }
3790
3791                         if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
3792                                 x86_mov_reg_imm (code, X86_EAX, ins->backend.data);
3793
3794                                 x86_prefix (code, X86_LOCK_PREFIX);
3795                                 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3796                         } else {
3797                                 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3798
3799                                 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3800                                 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3801                                 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3802                                 x86_patch (br [1], br [0]);
3803                         }
3804
3805                         if (breg != ins->inst_basereg)
3806                                 x86_pop_reg (code, X86_ESI);
3807
3808                         if (ins->sreg2 != sreg2)
3809                                 x86_pop_reg (code, X86_EDX);
3810
3811                         break;
3812                 }
3813 #ifdef MONO_ARCH_SIMD_INTRINSICS
3814                 case OP_ADDPS:
3815                         x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3816                         break;
3817                 case OP_DIVPS:
3818                         x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3819                         break;
3820                 case OP_MULPS:
3821                         x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3822                         break;
3823                 case OP_SUBPS:
3824                         x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3825                         break;
3826                 case OP_MAXPS:
3827                         x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3828                         break;
3829                 case OP_MINPS:
3830                         x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3831                         break;
3832                 case OP_COMPPS:
3833                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3834                         x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3835                         break;
3836                 case OP_ANDPS:
3837                         x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3838                         break;
3839                 case OP_ANDNPS:
3840                         x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3841                         break;
3842                 case OP_ORPS:
3843                         x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3844                         break;
3845                 case OP_XORPS:
3846                         x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3847                         break;
3848                 case OP_SQRTPS:
3849                         x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3850                         break;
3851                 case OP_RSQRTPS:
3852                         x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3853                         break;
3854                 case OP_RCPPS:
3855                         x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3856                         break;
3857                 case OP_ADDSUBPS:
3858                         x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3859                         break;
3860                 case OP_HADDPS:
3861                         x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3862                         break;
3863                 case OP_HSUBPS:
3864                         x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3865                         break;
3866                 case OP_DUPPS_HIGH:
3867                         x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3868                         break;
3869                 case OP_DUPPS_LOW:
3870                         x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3871                         break;
3872
3873                 case OP_PSHUFLEW_HIGH:
3874                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3875                         x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3876                         break;
3877                 case OP_PSHUFLEW_LOW:
3878                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3879                         x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3880                         break;
3881                 case OP_PSHUFLED:
3882                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3883                         x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3884                         break;
3885
3886                 case OP_ADDPD:
3887                         x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3888                         break;
3889                 case OP_DIVPD:
3890                         x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3891                         break;
3892                 case OP_MULPD:
3893                         x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3894                         break;
3895                 case OP_SUBPD:
3896                         x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3897                         break;
3898                 case OP_MAXPD:
3899                         x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3900                         break;
3901                 case OP_MINPD:
3902                         x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3903                         break;
3904                 case OP_COMPPD:
3905                         g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3906                         x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3907                         break;
3908                 case OP_ANDPD:
3909                         x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3910                         break;
3911                 case OP_ANDNPD:
3912                         x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3913                         break;
3914                 case OP_ORPD:
3915                         x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3916                         break;
3917                 case OP_XORPD:
3918                         x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3919                         break;
3920                 case OP_ADDSUBPD:
3921                         x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3922                         break;
3923                 case OP_HADDPD:
3924                         x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3925                         break;
3926                 case OP_HSUBPD:
3927                         x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3928                         break;
3929                 case OP_DUPPD:
3930                         x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
3931                         break;
3932                         
3933                 case OP_EXTRACT_MASK:
3934                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
3935                         break;
3936         
3937                 case OP_PAND:
3938                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
3939                         break;
3940                 case OP_POR:
3941                         x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
3942                         break;
3943                 case OP_PXOR:
3944                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
3945                         break;
3946
3947                 case OP_PADDB:
3948                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
3949                         break;
3950                 case OP_PADDW:
3951                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
3952                         break;
3953                 case OP_PADDD:
3954                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
3955                         break;
3956                 case OP_PADDQ:
3957                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
3958                         break;
3959
3960                 case OP_PSUBB:
3961                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
3962                         break;
3963                 case OP_PSUBW:
3964                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
3965                         break;
3966                 case OP_PSUBD:
3967                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
3968                         break;
3969                 case OP_PSUBQ:
3970                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
3971                         break;
3972
3973                 case OP_PMAXB_UN:
3974                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
3975                         break;
3976                 case OP_PMAXW_UN:
3977                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
3978                         break;
3979                 case OP_PMAXD_UN:
3980                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
3981                         break;
3982                 
3983                 case OP_PMAXB:
3984                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
3985                         break;
3986                 case OP_PMAXW:
3987                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
3988                         break;
3989                 case OP_PMAXD:
3990                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
3991                         break;
3992
3993                 case OP_PAVGB_UN:
3994                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
3995                         break;
3996                 case OP_PAVGW_UN:
3997                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
3998                         break;
3999
4000                 case OP_PMINB_UN:
4001                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
4002                         break;
4003                 case OP_PMINW_UN:
4004                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
4005                         break;
4006                 case OP_PMIND_UN:
4007                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
4008                         break;
4009
4010                 case OP_PMINB:
4011                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
4012                         break;
4013                 case OP_PMINW:
4014                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
4015                         break;
4016                 case OP_PMIND:
4017                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
4018                         break;
4019
4020                 case OP_PCMPEQB:
4021                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
4022                         break;
4023                 case OP_PCMPEQW:
4024                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
4025                         break;
4026                 case OP_PCMPEQD:
4027                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
4028                         break;
4029                 case OP_PCMPEQQ:
4030                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
4031                         break;
4032
4033                 case OP_PCMPGTB:
4034                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
4035                         break;
4036                 case OP_PCMPGTW:
4037                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
4038                         break;
4039                 case OP_PCMPGTD:
4040                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
4041                         break;
4042                 case OP_PCMPGTQ:
4043                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
4044                         break;
4045
4046                 case OP_PSUM_ABS_DIFF:
4047                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
4048                         break;
4049
4050                 case OP_UNPACK_LOWB:
4051                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
4052                         break;
4053                 case OP_UNPACK_LOWW:
4054                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
4055                         break;
4056                 case OP_UNPACK_LOWD:
4057                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
4058                         break;
4059                 case OP_UNPACK_LOWQ:
4060                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
4061                         break;
4062                 case OP_UNPACK_LOWPS:
4063                         x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4064                         break;
4065                 case OP_UNPACK_LOWPD:
4066                         x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
4067                         break;
4068
4069                 case OP_UNPACK_HIGHB:
4070                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
4071                         break;
4072                 case OP_UNPACK_HIGHW:
4073                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
4074                         break;
4075                 case OP_UNPACK_HIGHD:
4076                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
4077                         break;
4078                 case OP_UNPACK_HIGHQ:
4079                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
4080                         break;
4081                 case OP_UNPACK_HIGHPS:
4082                         x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4083                         break;
4084                 case OP_UNPACK_HIGHPD:
4085                         x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
4086                         break;
4087
4088                 case OP_PACKW:
4089                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
4090                         break;
4091                 case OP_PACKD:
4092                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
4093                         break;
4094                 case OP_PACKW_UN:
4095                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
4096                         break;
4097                 case OP_PACKD_UN:
4098                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
4099                         break;
4100
4101                 case OP_PADDB_SAT_UN:
4102                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
4103                         break;
4104                 case OP_PSUBB_SAT_UN:
4105                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
4106                         break;
4107                 case OP_PADDW_SAT_UN:
4108                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
4109                         break;
4110                 case OP_PSUBW_SAT_UN:
4111                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
4112                         break;
4113
4114                 case OP_PADDB_SAT:
4115                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
4116                         break;
4117                 case OP_PSUBB_SAT:
4118                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
4119                         break;
4120                 case OP_PADDW_SAT:
4121                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
4122                         break;
4123                 case OP_PSUBW_SAT:
4124                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4125                         break;
4126                         
4127                 case OP_PMULW:
4128                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4129                         break;
4130                 case OP_PMULD:
4131                         x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4132                         break;
4133                 case OP_PMULQ:
4134                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4135                         break;
4136                 case OP_PMULW_HIGH_UN:
4137                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4138                         break;
4139                 case OP_PMULW_HIGH:
4140                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4141                         break;
4142
4143                 case OP_PSHRW:
4144                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4145                         break;
4146                 case OP_PSHRW_REG:
4147                         x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4148                         break;
4149
4150                 case OP_PSARW:
4151                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4152                         break;
4153                 case OP_PSARW_REG:
4154                         x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4155                         break;
4156
4157                 case OP_PSHLW:
4158                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4159                         break;
4160                 case OP_PSHLW_REG:
4161                         x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4162                         break;
4163
4164                 case OP_PSHRD:
4165                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4166                         break;
4167                 case OP_PSHRD_REG:
4168                         x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4169                         break;
4170
4171                 case OP_PSARD:
4172                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4173                         break;
4174                 case OP_PSARD_REG:
4175                         x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4176                         break;
4177
4178                 case OP_PSHLD:
4179                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4180                         break;
4181                 case OP_PSHLD_REG:
4182                         x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4183                         break;
4184
4185                 case OP_PSHRQ:
4186                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4187                         break;
4188                 case OP_PSHRQ_REG:
4189                         x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4190                         break;
4191
4192                 case OP_PSHLQ:
4193                         x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4194                         break;
4195                 case OP_PSHLQ_REG:
4196                         x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4197                         break;          
4198                         
4199                 case OP_ICONV_TO_X:
4200                         x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4201                         break;
4202                 case OP_EXTRACT_I4:
4203                         x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4204                         break;
4205                 case OP_STOREX_MEMBASE_REG:
4206                 case OP_STOREX_MEMBASE:
4207                         x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4208                         break;
4209                 case OP_LOADX_MEMBASE:
4210                         x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4211                         break;
4212                 case OP_LOADX_ALIGNED_MEMBASE:
4213                         x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4214                         break;
4215                 case OP_STOREX_ALIGNED_MEMBASE_REG:
4216                         x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4217                         break;
4218                 case OP_STOREX_NTA_MEMBASE_REG:
4219                         x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4220                         break;
4221                 case OP_PREFETCH_MEMBASE:
4222                         x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4223
4224                         break;
4225                 case OP_XMOVE:
4226                         /*FIXME the peephole pass should have killed this*/
4227                         if (ins->dreg != ins->sreg1)
4228                                 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4229                         break;          
4230                 case OP_XZERO:
4231                         x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4232                         break;
4233                 case OP_ICONV_TO_R8_RAW:
4234                         x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4235                         x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4236                         break;
4237
4238                 case OP_FCONV_TO_R8_X:
4239                         x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4240                         x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4241                         break;
4242
4243                 case OP_XCONV_R8_TO_I4:
4244                         x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4245                         switch (ins->backend.source_opcode) {
4246                         case OP_FCONV_TO_I1:
4247                                 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4248                                 break;
4249                         case OP_FCONV_TO_U1:
4250                                 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4251                                 break;
4252                         case OP_FCONV_TO_I2:
4253                                 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4254                                 break;
4255                         case OP_FCONV_TO_U2:
4256                                 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4257                                 break;
4258                         }                       
4259                         break;
4260 #endif
4261                 default:
4262                         g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4263                         g_assert_not_reached ();
4264                 }
4265
4266                 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4267                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4268                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4269                         g_assert_not_reached ();
4270                 }
4271                
4272                 cpos += max_len;
4273         }
4274
4275         cfg->code_len = code - cfg->native_code;
4276 }
4277
4278 void
4279 mono_arch_register_lowlevel_calls (void)
4280 {
4281 }
4282
4283 void
4284 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4285 {
4286         MonoJumpInfo *patch_info;
4287         gboolean compile_aot = !run_cctors;
4288
4289         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4290                 unsigned char *ip = patch_info->ip.i + code;
4291                 const unsigned char *target;
4292
4293                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4294
4295                 if (compile_aot) {
4296                         switch (patch_info->type) {
4297                         case MONO_PATCH_INFO_BB:
4298                         case MONO_PATCH_INFO_LABEL:
4299                                 break;
4300                         default:
4301                                 /* No need to patch these */
4302                                 continue;
4303                         }
4304                 }
4305
4306                 switch (patch_info->type) {
4307                 case MONO_PATCH_INFO_IP:
4308                         *((gconstpointer *)(ip)) = target;
4309                         break;
4310                 case MONO_PATCH_INFO_CLASS_INIT: {
4311                         guint8 *code = ip;
4312                         /* Might already been changed to a nop */
4313                         x86_call_code (code, 0);
4314                         x86_patch (ip, target);
4315                         break;
4316                 }
4317                 case MONO_PATCH_INFO_ABS:
4318                 case MONO_PATCH_INFO_METHOD:
4319                 case MONO_PATCH_INFO_METHOD_JUMP:
4320                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4321                 case MONO_PATCH_INFO_BB:
4322                 case MONO_PATCH_INFO_LABEL:
4323                 case MONO_PATCH_INFO_RGCTX_FETCH:
4324                 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4325                 case MONO_PATCH_INFO_MONITOR_ENTER:
4326                 case MONO_PATCH_INFO_MONITOR_EXIT:
4327                         x86_patch (ip, target);
4328                         break;
4329                 case MONO_PATCH_INFO_NONE:
4330                         break;
4331                 default: {
4332                         guint32 offset = mono_arch_get_patch_offset (ip);
4333                         *((gconstpointer *)(ip + offset)) = target;
4334                         break;
4335                 }
4336                 }
4337         }
4338 }
4339
4340 guint8 *
4341 mono_arch_emit_prolog (MonoCompile *cfg)
4342 {
4343         MonoMethod *method = cfg->method;
4344         MonoBasicBlock *bb;
4345         MonoMethodSignature *sig;
4346         MonoInst *inst;
4347         int alloc_size, pos, max_offset, i;
4348         guint8 *code;
4349
4350         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 10240);
4351
4352         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4353                 cfg->code_size += 512;
4354
4355         code = cfg->native_code = g_malloc (cfg->code_size);
4356
4357         x86_push_reg (code, X86_EBP);
4358         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4359
4360         alloc_size = cfg->stack_offset;
4361         pos = 0;
4362
4363         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4364                 /* Might need to attach the thread to the JIT  or change the domain for the callback */
4365                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4366                         guint8 *buf, *no_domain_branch;
4367
4368                         code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4369                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4370                         no_domain_branch = code;
4371                         x86_branch8 (code, X86_CC_NE, 0, 0);
4372                         code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4373                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4374                         buf = code;
4375                         x86_branch8 (code, X86_CC_NE, 0, 0);
4376                         x86_patch (no_domain_branch, code);
4377                         x86_push_imm (code, cfg->domain);
4378                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4379                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4380                         x86_patch (buf, code);
4381 #ifdef PLATFORM_WIN32
4382                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4383                         /* FIXME: Add a separate key for LMF to avoid this */
4384                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4385 #endif
4386                 }
4387                 else {
4388                         g_assert (!cfg->compile_aot);
4389                         x86_push_imm (code, cfg->domain);
4390                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4391                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4392                 }
4393         }
4394
4395         if (method->save_lmf) {
4396                 pos += sizeof (MonoLMF);
4397
4398                 /* save the current IP */
4399                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4400                 x86_push_imm_template (code);
4401
4402                 /* save all caller saved regs */
4403                 x86_push_reg (code, X86_EBP);
4404                 x86_push_reg (code, X86_ESI);
4405                 x86_push_reg (code, X86_EDI);
4406                 x86_push_reg (code, X86_EBX);
4407
4408                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4409                         /*
4410                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
4411                          * through the mono_lmf_addr TLS variable.
4412                          */
4413                         /* %eax = previous_lmf */
4414                         x86_prefix (code, X86_GS_PREFIX);
4415                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4416                         /* skip esp + method_info + lmf */
4417                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4418                         /* push previous_lmf */
4419                         x86_push_reg (code, X86_EAX);
4420                         /* new lmf = ESP */
4421                         x86_prefix (code, X86_GS_PREFIX);
4422                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4423                 } else {
4424                         /* get the address of lmf for the current thread */
4425                         /* 
4426                          * This is performance critical so we try to use some tricks to make
4427                          * it fast.
4428                          */                                                                        
4429
4430                         if (lmf_addr_tls_offset != -1) {
4431                                 /* Load lmf quicky using the GS register */
4432                                 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4433 #ifdef PLATFORM_WIN32
4434                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4435                                 /* FIXME: Add a separate key for LMF to avoid this */
4436                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4437 #endif
4438                         } else {
4439                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4440                         }
4441
4442                         /* Skip esp + method info */
4443                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4444
4445                         /* push lmf */
4446                         x86_push_reg (code, X86_EAX); 
4447                         /* push *lfm (previous_lmf) */
4448                         x86_push_membase (code, X86_EAX, 0);
4449                         /* *(lmf) = ESP */
4450                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4451                 }
4452         } else {
4453
4454                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4455                         x86_push_reg (code, X86_EBX);
4456                         pos += 4;
4457                 }
4458
4459                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4460                         x86_push_reg (code, X86_EDI);
4461                         pos += 4;
4462                 }
4463
4464                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4465                         x86_push_reg (code, X86_ESI);
4466                         pos += 4;
4467                 }
4468         }
4469
4470         alloc_size -= pos;
4471
4472         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4473         if (mono_do_x86_stack_align) {
4474                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
4475                 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4476                 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4477         }
4478
4479         if (alloc_size) {
4480                 /* See mono_emit_stack_alloc */
4481 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4482                 guint32 remaining_size = alloc_size;
4483                 while (remaining_size >= 0x1000) {
4484                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4485                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4486                         remaining_size -= 0x1000;
4487                 }
4488                 if (remaining_size)
4489                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4490 #else
4491                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4492 #endif
4493         }
4494
4495         if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4496                         cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4497                 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4498         }
4499
4500 #if DEBUG_STACK_ALIGNMENT
4501         /* check the stack is aligned */
4502         if (method->wrapper_type == MONO_WRAPPER_NONE) {
4503                 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4504                 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4505                 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4506                 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4507                 x86_breakpoint (code);
4508         }
4509 #endif
4510
4511         /* compute max_offset in order to use short forward jumps */
4512         max_offset = 0;
4513         if (cfg->opt & MONO_OPT_BRANCH) {
4514                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4515                         MonoInst *ins;
4516                         bb->max_offset = max_offset;
4517
4518                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4519                                 max_offset += 6;
4520                         /* max alignment for loops */
4521                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4522                                 max_offset += LOOP_ALIGNMENT;
4523
4524                         MONO_BB_FOR_EACH_INS (bb, ins) {
4525                                 if (ins->opcode == OP_LABEL)
4526                                         ins->inst_c1 = max_offset;
4527                                 
4528                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4529                         }
4530                 }
4531         }
4532
4533         /* store runtime generic context */
4534         if (cfg->rgctx_var) {
4535                 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4536
4537                 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4538         }
4539
4540         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4541                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4542
4543         /* load arguments allocated to register from the stack */
4544         sig = mono_method_signature (method);
4545         pos = 0;
4546
4547         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4548                 inst = cfg->args [pos];
4549                 if (inst->opcode == OP_REGVAR) {
4550                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4551                         if (cfg->verbose_level > 2)
4552                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4553                 }
4554                 pos++;
4555         }
4556
4557         cfg->code_len = code - cfg->native_code;
4558
4559         g_assert (cfg->code_len < cfg->code_size);
4560
4561         return code;
4562 }
4563
4564 void
4565 mono_arch_emit_epilog (MonoCompile *cfg)
4566 {
4567         MonoMethod *method = cfg->method;
4568         MonoMethodSignature *sig = mono_method_signature (method);
4569         int quad, pos;
4570         guint32 stack_to_pop;
4571         guint8 *code;
4572         int max_epilog_size = 16;
4573         CallInfo *cinfo;
4574         
4575         if (cfg->method->save_lmf)
4576                 max_epilog_size += 128;
4577
4578         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4579                 cfg->code_size *= 2;
4580                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4581                 mono_jit_stats.code_reallocs++;
4582         }
4583
4584         code = cfg->native_code + cfg->code_len;
4585
4586         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4587                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4588
4589         /* the code restoring the registers must be kept in sync with OP_JMP */
4590         pos = 0;
4591         
4592         if (method->save_lmf) {
4593                 gint32 prev_lmf_reg;
4594                 gint32 lmf_offset = -sizeof (MonoLMF);
4595
4596                 /* check if we need to restore protection of the stack after a stack overflow */
4597                 if (mono_get_jit_tls_offset () != -1) {
4598                         guint8 *patch;
4599                         code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4600                         /* we load the value in a separate instruction: this mechanism may be
4601                          * used later as a safer way to do thread interruption
4602                          */
4603                         x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4604                         x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4605                         patch = code;
4606                         x86_branch8 (code, X86_CC_Z, 0, FALSE);
4607                         /* note that the call trampoline will preserve eax/edx */
4608                         x86_call_reg (code, X86_ECX);
4609                         x86_patch (patch, code);
4610                 } else {
4611                         /* FIXME: maybe save the jit tls in the prolog */
4612                 }
4613                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4614                         /*
4615                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
4616                          * through the mono_lmf_addr TLS variable.
4617                          */
4618                         /* reg = previous_lmf */
4619                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4620
4621                         /* lmf = previous_lmf */
4622                         x86_prefix (code, X86_GS_PREFIX);
4623                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4624                 } else {
4625                         /* Find a spare register */
4626                         switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4627                         case MONO_TYPE_I8:
4628                         case MONO_TYPE_U8:
4629                                 prev_lmf_reg = X86_EDI;
4630                                 cfg->used_int_regs |= (1 << X86_EDI);
4631                                 break;
4632                         default:
4633                                 prev_lmf_reg = X86_EDX;
4634                                 break;
4635                         }
4636
4637                         /* reg = previous_lmf */
4638                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4639
4640                         /* ecx = lmf */
4641                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4642
4643                         /* *(lmf) = previous_lmf */
4644                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4645                 }
4646
4647                 /* restore caller saved regs */
4648                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4649                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4650                 }
4651
4652                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4653                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4654                 }
4655                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4656                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4657                 }
4658
4659                 /* EBP is restored by LEAVE */
4660         } else {
4661                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4662                         pos -= 4;
4663                 }
4664                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4665                         pos -= 4;
4666                 }
4667                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4668                         pos -= 4;
4669                 }
4670
4671                 if (pos)
4672                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4673
4674                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4675                         x86_pop_reg (code, X86_ESI);
4676                 }
4677                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4678                         x86_pop_reg (code, X86_EDI);
4679                 }
4680                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4681                         x86_pop_reg (code, X86_EBX);
4682                 }
4683         }
4684
4685         /* Load returned vtypes into registers if needed */
4686         cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4687         if (cinfo->ret.storage == ArgValuetypeInReg) {
4688                 for (quad = 0; quad < 2; quad ++) {
4689                         switch (cinfo->ret.pair_storage [quad]) {
4690                         case ArgInIReg:
4691                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4692                                 break;
4693                         case ArgOnFloatFpStack:
4694                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4695                                 break;
4696                         case ArgOnDoubleFpStack:
4697                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4698                                 break;
4699                         case ArgNone:
4700                                 break;
4701                         default:
4702                                 g_assert_not_reached ();
4703                         }
4704                 }
4705         }
4706
4707         x86_leave (code);
4708
4709         if (CALLCONV_IS_STDCALL (sig)) {
4710                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4711
4712                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4713         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4714                 stack_to_pop = 4;
4715         else
4716                 stack_to_pop = 0;
4717
4718         if (stack_to_pop)
4719                 x86_ret_imm (code, stack_to_pop);
4720         else
4721                 x86_ret (code);
4722
4723         cfg->code_len = code - cfg->native_code;
4724
4725         g_assert (cfg->code_len < cfg->code_size);
4726 }
4727
4728 void
4729 mono_arch_emit_exceptions (MonoCompile *cfg)
4730 {
4731         MonoJumpInfo *patch_info;
4732         int nthrows, i;
4733         guint8 *code;
4734         MonoClass *exc_classes [16];
4735         guint8 *exc_throw_start [16], *exc_throw_end [16];
4736         guint32 code_size;
4737         int exc_count = 0;
4738
4739         /* Compute needed space */
4740         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4741                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4742                         exc_count++;
4743         }
4744
4745         /* 
4746          * make sure we have enough space for exceptions
4747          * 16 is the size of two push_imm instructions and a call
4748          */
4749         if (cfg->compile_aot)
4750                 code_size = exc_count * 32;
4751         else
4752                 code_size = exc_count * 16;
4753
4754         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4755                 cfg->code_size *= 2;
4756                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4757                 mono_jit_stats.code_reallocs++;
4758         }
4759
4760         code = cfg->native_code + cfg->code_len;
4761
4762         nthrows = 0;
4763         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4764                 switch (patch_info->type) {
4765                 case MONO_PATCH_INFO_EXC: {
4766                         MonoClass *exc_class;
4767                         guint8 *buf, *buf2;
4768                         guint32 throw_ip;
4769
4770                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4771
4772                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4773                         g_assert (exc_class);
4774                         throw_ip = patch_info->ip.i;
4775
4776                         /* Find a throw sequence for the same exception class */
4777                         for (i = 0; i < nthrows; ++i)
4778                                 if (exc_classes [i] == exc_class)
4779                                         break;
4780                         if (i < nthrows) {
4781                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4782                                 x86_jump_code (code, exc_throw_start [i]);
4783                                 patch_info->type = MONO_PATCH_INFO_NONE;
4784                         }
4785                         else {
4786                                 guint32 size;
4787
4788                                 /* Compute size of code following the push <OFFSET> */
4789                                 size = 5 + 5;
4790
4791                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4792                                         /* Use the shorter form */
4793                                         buf = buf2 = code;
4794                                         x86_push_imm (code, 0);
4795                                 }
4796                                 else {
4797                                         buf = code;
4798                                         x86_push_imm (code, 0xf0f0f0f0);
4799                                         buf2 = code;
4800                                 }
4801
4802                                 if (nthrows < 16) {
4803                                         exc_classes [nthrows] = exc_class;
4804                                         exc_throw_start [nthrows] = code;
4805                                 }
4806
4807                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4808                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4809                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4810                                 patch_info->ip.i = code - cfg->native_code;
4811                                 x86_call_code (code, 0);
4812                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4813                                 while (buf < buf2)
4814                                         x86_nop (buf);
4815
4816                                 if (nthrows < 16) {
4817                                         exc_throw_end [nthrows] = code;
4818                                         nthrows ++;
4819                                 }
4820                         }
4821                         break;
4822                 }
4823                 default:
4824                         /* do nothing */
4825                         break;
4826                 }
4827         }
4828
4829         cfg->code_len = code - cfg->native_code;
4830
4831         g_assert (cfg->code_len < cfg->code_size);
4832 }
4833
4834 void
4835 mono_arch_flush_icache (guint8 *code, gint size)
4836 {
4837         /* not needed */
4838 }
4839
4840 void
4841 mono_arch_flush_register_windows (void)
4842 {
4843 }
4844
4845 gboolean 
4846 mono_arch_is_inst_imm (gint64 imm)
4847 {
4848         return TRUE;
4849 }
4850
4851 /*
4852  * Support for fast access to the thread-local lmf structure using the GS
4853  * segment register on NPTL + kernel 2.6.x.
4854  */
4855
4856 static gboolean tls_offset_inited = FALSE;
4857
4858 void
4859 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4860 {
4861         if (!tls_offset_inited) {
4862                 if (!getenv ("MONO_NO_TLS")) {
4863 #ifdef PLATFORM_WIN32
4864                         /* 
4865                          * We need to init this multiple times, since when we are first called, the key might not
4866                          * be initialized yet.
4867                          */
4868                         appdomain_tls_offset = mono_domain_get_tls_key ();
4869                         lmf_tls_offset = mono_get_jit_tls_key ();
4870                         thread_tls_offset = mono_thread_get_tls_key ();
4871
4872                         /* Only 64 tls entries can be accessed using inline code */
4873                         if (appdomain_tls_offset >= 64)
4874                                 appdomain_tls_offset = -1;
4875                         if (lmf_tls_offset >= 64)
4876                                 lmf_tls_offset = -1;
4877                         if (thread_tls_offset >= 64)
4878                                 thread_tls_offset = -1;
4879 #else
4880 #if MONO_XEN_OPT
4881                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4882 #endif
4883                         tls_offset_inited = TRUE;
4884                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4885                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4886                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4887                         thread_tls_offset = mono_thread_get_tls_offset ();
4888 #endif
4889                 }
4890         }               
4891 }
4892
4893 void
4894 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4895 {
4896 }
4897
4898 void
4899 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4900 {
4901         MonoCallInst *call = (MonoCallInst*)inst;
4902         CallInfo *cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, inst->signature, FALSE);
4903
4904         /* add the this argument */
4905         if (this_reg != -1) {
4906                 if (cinfo->args [0].storage == ArgInIReg) {
4907                         MonoInst *this;
4908                         MONO_INST_NEW (cfg, this, OP_MOVE);
4909                         this->type = this_type;
4910                         this->sreg1 = this_reg;
4911                         this->dreg = mono_regstate_next_int (cfg->rs);
4912                         mono_bblock_add_inst (cfg->cbb, this);
4913
4914                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4915                 }
4916                 else {
4917                         MonoInst *this;
4918                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4919                         this->type = this_type;
4920                         this->sreg1 = this_reg;
4921                         mono_bblock_add_inst (cfg->cbb, this);
4922                 }
4923         }
4924
4925         if (vt_reg != -1) {
4926                 MonoInst *vtarg;
4927
4928                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4929                         /*
4930                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4931                          * the stack. Save the address here, so the call instruction can
4932                          * access it.
4933                          */
4934                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4935                         vtarg->inst_destbasereg = X86_ESP;
4936                         vtarg->inst_offset = inst->stack_usage;
4937                         vtarg->sreg1 = vt_reg;
4938                         mono_bblock_add_inst (cfg->cbb, vtarg);
4939                 }
4940                 else if (cinfo->ret.storage == ArgInIReg) {
4941                         /* The return address is passed in a register */
4942                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4943                         vtarg->sreg1 = vt_reg;
4944                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4945                         mono_bblock_add_inst (cfg->cbb, vtarg);
4946
4947                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4948                 } else {
4949                         MonoInst *vtarg;
4950                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4951                         vtarg->type = STACK_MP;
4952                         vtarg->sreg1 = vt_reg;
4953                         mono_bblock_add_inst (cfg->cbb, vtarg);
4954                 }
4955         }
4956 }
4957
4958 #ifdef MONO_ARCH_HAVE_IMT
4959
4960 // Linear handler, the bsearch head compare is shorter
4961 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4962 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4963 //        x86_patch(ins,target)
4964 //[1 + 5] x86_jump_mem(inst,mem)
4965
4966 #define CMP_SIZE 6
4967 #define BR_SMALL_SIZE 2
4968 #define BR_LARGE_SIZE 5
4969 #define JUMP_IMM_SIZE 6
4970 #define ENABLE_WRONG_METHOD_CHECK 0
4971
4972 static int
4973 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4974 {
4975         int i, distance = 0;
4976         for (i = start; i < target; ++i)
4977                 distance += imt_entries [i]->chunk_size;
4978         return distance;
4979 }
4980
4981 /*
4982  * LOCKING: called with the domain lock held
4983  */
4984 gpointer
4985 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
4986         gpointer fail_tramp)
4987 {
4988         int i;
4989         int size = 0;
4990         guint8 *code, *start;
4991
4992         for (i = 0; i < count; ++i) {
4993                 MonoIMTCheckItem *item = imt_entries [i];
4994                 if (item->is_equals) {
4995                         if (item->check_target_idx) {
4996                                 if (!item->compare_done)
4997                                         item->chunk_size += CMP_SIZE;
4998                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4999                         } else {
5000                                 if (fail_tramp) {
5001                                         item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
5002                                 } else {
5003                                         item->chunk_size += JUMP_IMM_SIZE;
5004 #if ENABLE_WRONG_METHOD_CHECK
5005                                         item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5006 #endif
5007                                 }
5008                         }
5009                 } else {
5010                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
5011                         imt_entries [item->check_target_idx]->compare_done = TRUE;
5012                 }
5013                 size += item->chunk_size;
5014         }
5015         if (fail_tramp)
5016                 code = mono_method_alloc_generic_virtual_thunk (domain, size);
5017         else
5018                 code = mono_code_manager_reserve (domain->code_mp, size);
5019         start = code;
5020         for (i = 0; i < count; ++i) {
5021                 MonoIMTCheckItem *item = imt_entries [i];
5022                 item->code_target = code;
5023                 if (item->is_equals) {
5024                         if (item->check_target_idx) {
5025                                 if (!item->compare_done)
5026                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5027                                 item->jmp_code = code;
5028                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5029                                 if (fail_tramp)
5030                                         x86_jump_code (code, item->value.target_code);
5031                                 else
5032                                         x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5033                         } else {
5034                                 if (fail_tramp) {
5035                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5036                                         item->jmp_code = code;
5037                                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
5038                                         x86_jump_code (code, item->value.target_code);
5039                                         x86_patch (item->jmp_code, code);
5040                                         x86_jump_code (code, fail_tramp);
5041                                         item->jmp_code = NULL;
5042                                 } else {
5043                                         /* enable the commented code to assert on wrong method */
5044 #if ENABLE_WRONG_METHOD_CHECK
5045                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5046                                         item->jmp_code = code;
5047                                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
5048 #endif
5049                                         x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5050 #if ENABLE_WRONG_METHOD_CHECK
5051                                         x86_patch (item->jmp_code, code);
5052                                         x86_breakpoint (code);
5053                                         item->jmp_code = NULL;
5054 #endif
5055                                 }
5056                         }
5057                 } else {
5058                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5059                         item->jmp_code = code;
5060                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5061                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5062                         else
5063                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5064                 }
5065         }
5066         /* patch the branches to get to the target items */
5067         for (i = 0; i < count; ++i) {
5068                 MonoIMTCheckItem *item = imt_entries [i];
5069                 if (item->jmp_code) {
5070                         if (item->check_target_idx) {
5071                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5072                         }
5073                 }
5074         }
5075
5076         if (!fail_tramp)
5077                 mono_stats.imt_thunks_size += code - start;
5078         g_assert (code - start <= size);
5079         return start;
5080 }
5081
5082 MonoMethod*
5083 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
5084 {
5085         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5086 }
5087
5088 MonoObject*
5089 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
5090 {
5091         MonoMethodSignature *sig = mono_method_signature (method);
5092         CallInfo *cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5093         int this_argument_offset;
5094         MonoObject *this_argument;
5095
5096         /* 
5097          * this is the offset of the this arg from esp as saved at the start of 
5098          * mono_arch_create_trampoline_code () in tramp-x86.c.
5099          */
5100         this_argument_offset = 5;
5101         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
5102                 this_argument_offset++;
5103
5104         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
5105
5106         g_free (cinfo);
5107         return this_argument;
5108 }
5109 #endif
5110
5111 MonoVTable*
5112 mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
5113 {
5114         return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5115 }
5116
5117 MonoInst*
5118 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5119 {
5120         MonoInst *ins = NULL;
5121
5122         if (cmethod->klass == mono_defaults.math_class) {
5123                 if (strcmp (cmethod->name, "Sin") == 0) {
5124                         MONO_INST_NEW (cfg, ins, OP_SIN);
5125                         ins->inst_i0 = args [0];
5126                 } else if (strcmp (cmethod->name, "Cos") == 0) {
5127                         MONO_INST_NEW (cfg, ins, OP_COS);
5128                         ins->inst_i0 = args [0];
5129                 } else if (strcmp (cmethod->name, "Tan") == 0) {
5130                         MONO_INST_NEW (cfg, ins, OP_TAN);
5131                         ins->inst_i0 = args [0];
5132                 } else if (strcmp (cmethod->name, "Atan") == 0) {
5133                         MONO_INST_NEW (cfg, ins, OP_ATAN);
5134                         ins->inst_i0 = args [0];
5135                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5136                         MONO_INST_NEW (cfg, ins, OP_SQRT);
5137                         ins->inst_i0 = args [0];
5138                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5139                         MONO_INST_NEW (cfg, ins, OP_ABS);
5140                         ins->inst_i0 = args [0];
5141                 }
5142
5143                 if (cfg->opt & MONO_OPT_CMOV) {
5144                         int opcode = 0;
5145
5146                         if (strcmp (cmethod->name, "Min") == 0) {
5147                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5148                                         opcode = OP_IMIN;
5149                                 else if (fsig->params [0]->type == MONO_TYPE_U4)
5150                                         opcode = OP_IMIN_UN;
5151                         } else if (strcmp (cmethod->name, "Max") == 0) {
5152                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5153                                         opcode = OP_IMAX;
5154                                 else if (fsig->params [0]->type == MONO_TYPE_U4)
5155                                         opcode = OP_IMAX_UN;
5156                         }               
5157
5158                         if (opcode) {
5159                                 MONO_INST_NEW (cfg, ins, opcode);
5160                                 ins->inst_i0 = args [0];
5161                                 ins->inst_i1 = args [1];
5162                         }
5163                 }
5164
5165 #if 0
5166                 /* OP_FREM is not IEEE compatible */
5167                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5168                         MONO_INST_NEW (cfg, ins, OP_FREM);
5169                         ins->inst_i0 = args [0];
5170                         ins->inst_i1 = args [1];
5171                 }
5172 #endif
5173         }
5174
5175         return ins;
5176 }
5177
5178 MonoInst*
5179 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5180 {
5181         MonoInst *ins = NULL;
5182         int opcode = 0;
5183
5184         if (cmethod->klass == mono_defaults.math_class) {
5185                 if (strcmp (cmethod->name, "Sin") == 0) {
5186                         opcode = OP_SIN;
5187                 } else if (strcmp (cmethod->name, "Cos") == 0) {
5188                         opcode = OP_COS;
5189                 } else if (strcmp (cmethod->name, "Tan") == 0) {
5190                         opcode = OP_TAN;
5191                 } else if (strcmp (cmethod->name, "Atan") == 0) {
5192                         opcode = OP_ATAN;
5193                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5194                         opcode = OP_SQRT;
5195                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5196                         opcode = OP_ABS;
5197                 }
5198                 
5199                 if (opcode) {
5200                         MONO_INST_NEW (cfg, ins, opcode);
5201                         ins->type = STACK_R8;
5202                         ins->dreg = mono_alloc_freg (cfg);
5203                         ins->sreg1 = args [0]->dreg;
5204                         MONO_ADD_INS (cfg->cbb, ins);
5205                 }
5206
5207                 if (cfg->opt & MONO_OPT_CMOV) {
5208                         int opcode = 0;
5209
5210                         if (strcmp (cmethod->name, "Min") == 0) {
5211                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5212                                         opcode = OP_IMIN;
5213                         } else if (strcmp (cmethod->name, "Max") == 0) {
5214                                 if (fsig->params [0]->type == MONO_TYPE_I4)
5215                                         opcode = OP_IMAX;
5216                         }               
5217
5218                         if (opcode) {
5219                                 MONO_INST_NEW (cfg, ins, opcode);
5220                                 ins->type = STACK_I4;
5221                                 ins->dreg = mono_alloc_ireg (cfg);
5222                                 ins->sreg1 = args [0]->dreg;
5223                                 ins->sreg2 = args [1]->dreg;
5224                                 MONO_ADD_INS (cfg->cbb, ins);
5225                         }
5226                 }
5227
5228 #if 0
5229                 /* OP_FREM is not IEEE compatible */
5230                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5231                         MONO_INST_NEW (cfg, ins, OP_FREM);
5232                         ins->inst_i0 = args [0];
5233                         ins->inst_i1 = args [1];
5234                 }
5235 #endif
5236         }
5237
5238         return ins;
5239 }
5240
5241 gboolean
5242 mono_arch_print_tree (MonoInst *tree, int arity)
5243 {
5244         return 0;
5245 }
5246
5247 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5248 {
5249         MonoInst* ins;
5250
5251         return NULL;
5252
5253         if (appdomain_tls_offset == -1)
5254                 return NULL;
5255
5256         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5257         ins->inst_offset = appdomain_tls_offset;
5258         return ins;
5259 }
5260
5261 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5262 {
5263         MonoInst* ins;
5264
5265         if (thread_tls_offset == -1)
5266                 return NULL;
5267
5268         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5269         ins->inst_offset = thread_tls_offset;
5270         return ins;
5271 }
5272
5273 guint32
5274 mono_arch_get_patch_offset (guint8 *code)
5275 {
5276         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5277                 return 2;
5278         else if ((code [0] == 0xba))
5279                 return 1;
5280         else if ((code [0] == 0x68))
5281                 /* push IMM */
5282                 return 1;
5283         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5284                 /* push <OFFSET>(<REG>) */
5285                 return 2;
5286         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5287                 /* call *<OFFSET>(<REG>) */
5288                 return 2;
5289         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5290                 /* fldl <ADDR> */
5291                 return 2;
5292         else if ((code [0] == 0x58) && (code [1] == 0x05))
5293                 /* pop %eax; add <OFFSET>, %eax */
5294                 return 2;
5295         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5296                 /* pop <REG>; add <OFFSET>, <REG> */
5297                 return 3;
5298         else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5299                 /* mov <REG>, imm */
5300                 return 1;
5301         else {
5302                 g_assert_not_reached ();
5303                 return -1;
5304         }
5305 }
5306
5307 /**
5308  * mono_breakpoint_clean_code:
5309  *
5310  * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5311  * breakpoints in the original code, they are removed in the copy.
5312  *
5313  * Returns TRUE if no sw breakpoint was present.
5314  */
5315 gboolean
5316 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5317 {
5318         int i;
5319         gboolean can_write = TRUE;
5320         /*
5321          * If method_start is non-NULL we need to perform bound checks, since we access memory
5322          * at code - offset we could go before the start of the method and end up in a different
5323          * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5324          * instead.
5325          */
5326         if (!method_start || code - offset >= method_start) {
5327                 memcpy (buf, code - offset, size);
5328         } else {
5329                 int diff = code - method_start;
5330                 memset (buf, 0, size);
5331                 memcpy (buf + offset - diff, method_start, diff + size - offset);
5332         }
5333         code -= offset;
5334         for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5335                 int idx = mono_breakpoint_info_index [i];
5336                 guint8 *ptr;
5337                 if (idx < 1)
5338                         continue;
5339                 ptr = mono_breakpoint_info [idx].address;
5340                 if (ptr >= code && ptr < code + size) {
5341                         guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5342                         can_write = FALSE;
5343                         /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5344                         buf [ptr - code] = saved_byte;
5345                 }
5346         }
5347         return can_write;
5348 }
5349
5350 gpointer
5351 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
5352 {
5353         guint8 buf [8];
5354         guint8 reg = 0;
5355         gint32 disp = 0;
5356
5357         mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5358         code = buf + 8;
5359
5360         *displacement = 0;
5361
5362         /* go to the start of the call instruction
5363          *
5364          * address_byte = (m << 6) | (o << 3) | reg
5365          * call opcode: 0xff address_byte displacement
5366          * 0xff m=1,o=2 imm8
5367          * 0xff m=2,o=2 imm32
5368          */
5369         code -= 6;
5370
5371         /* 
5372          * A given byte sequence can match more than case here, so we have to be
5373          * really careful about the ordering of the cases. Longer sequences
5374          * come first.
5375          */
5376         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
5377                 /*
5378                  * This is an interface call
5379                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
5380                  * ff 10                   call   *(%eax)
5381                  */
5382                 reg = x86_modrm_rm (code [5]);
5383                 disp = 0;
5384 #ifdef MONO_ARCH_HAVE_IMT
5385         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
5386                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
5387                  * ba 14 f8 28 08          mov    $0x828f814,%edx
5388                  * ff 50 fc                call   *0xfffffffc(%eax)
5389                  */
5390                 reg = code [4] & 0x07;
5391                 disp = (signed char)code [5];
5392 #endif
5393         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5394                 reg = code [4] & 0x07;
5395                 disp = (signed char)code [5];
5396         } else {
5397                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5398                         reg = code [1] & 0x07;
5399                         disp = *((gint32*)(code + 2));
5400                 } else if ((code [1] == 0xe8)) {
5401                         return NULL;
5402                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5403                         /*
5404                          * This is a interface call
5405                          * 8b 40 30   mov    0x30(%eax),%eax
5406                          * ff 10      call   *(%eax)
5407                          */
5408                         disp = 0;
5409                         reg = code [5] & 0x07;
5410                 }
5411                 else
5412                         return NULL;
5413         }
5414
5415         *displacement = disp;
5416         return regs [reg];
5417 }
5418
5419 gpointer*
5420 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
5421 {
5422         gpointer vt;
5423         int displacement;
5424         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
5425         if (!vt)
5426                 return NULL;
5427         return (gpointer*)((char*)vt + displacement);
5428 }
5429
5430 gpointer
5431 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5432                 gssize *regs, guint8 *code)
5433 {
5434         guint32 esp = regs [X86_ESP];
5435         CallInfo *cinfo;
5436         gpointer res;
5437
5438         if (!gsctx && code)
5439                 gsctx = mono_get_generic_context_from_code (code);
5440         cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5441
5442         /*
5443          * The stack looks like:
5444          * <other args>
5445          * <this=delegate>
5446          * <possible vtype return address>
5447          * <return addr>
5448          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5449          */
5450         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
5451         g_free (cinfo);
5452         return res;
5453 }
5454
5455 #define MAX_ARCH_DELEGATE_PARAMS 10
5456
5457 gpointer
5458 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5459 {
5460         guint8 *code, *start;
5461
5462         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5463                 return NULL;
5464
5465         /* FIXME: Support more cases */
5466         if (MONO_TYPE_ISSTRUCT (sig->ret))
5467                 return NULL;
5468
5469         /*
5470          * The stack contains:
5471          * <delegate>
5472          * <return addr>
5473          */
5474
5475         if (has_target) {
5476                 static guint8* cached = NULL;
5477                 if (cached)
5478                         return cached;
5479                 
5480                 start = code = mono_global_codeman_reserve (64);
5481
5482                 /* Replace the this argument with the target */
5483                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5484                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5485                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5486                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5487
5488                 g_assert ((code - start) < 64);
5489
5490                 mono_debug_add_delegate_trampoline (start, code - start);
5491
5492                 mono_memory_barrier ();
5493
5494                 cached = start;
5495         } else {
5496                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5497                 int i = 0;
5498                 /* 8 for mov_reg and jump, plus 8 for each parameter */
5499                 int code_reserve = 8 + (sig->param_count * 8);
5500
5501                 for (i = 0; i < sig->param_count; ++i)
5502                         if (!mono_is_regsize_var (sig->params [i]))
5503                                 return NULL;
5504
5505                 code = cache [sig->param_count];
5506                 if (code)
5507                         return code;
5508
5509                 /*
5510                  * The stack contains:
5511                  * <args in reverse order>
5512                  * <delegate>
5513                  * <return addr>
5514                  *
5515                  * and we need:
5516                  * <args in reverse order>
5517                  * <return addr>
5518                  * 
5519                  * without unbalancing the stack.
5520                  * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5521                  * and leaving original spot of first arg as placeholder in stack so
5522                  * when callee pops stack everything works.
5523                  */
5524
5525                 start = code = mono_global_codeman_reserve (code_reserve);
5526
5527                 /* store delegate for access to method_ptr */
5528                 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5529
5530                 /* move args up */
5531                 for (i = 0; i < sig->param_count; ++i) {
5532                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5533                         x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5534                 }
5535
5536                 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5537
5538                 g_assert ((code - start) < code_reserve);
5539
5540                 mono_debug_add_delegate_trampoline (start, code - start);
5541
5542                 mono_memory_barrier ();
5543
5544                 cache [sig->param_count] = start;
5545         }
5546
5547         return start;
5548 }
5549
5550 gpointer
5551 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5552 {
5553         switch (reg) {
5554         case X86_ECX: return (gpointer)ctx->ecx;
5555         case X86_EDX: return (gpointer)ctx->edx;
5556         case X86_EBP: return (gpointer)ctx->ebp;
5557         case X86_ESP: return (gpointer)ctx->esp;
5558         default: return ((gpointer)(&ctx->eax)[reg]);
5559         }
5560 }
5561
5562 #ifdef MONO_ARCH_SIMD_INTRINSICS
5563
5564 static MonoInst*
5565 get_float_to_x_spill_area (MonoCompile *cfg)
5566 {
5567         if (!cfg->fconv_to_r8_x_var) {
5568                 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5569                 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5570         }       
5571         return cfg->fconv_to_r8_x_var;
5572 }
5573
5574 /*
5575  * Convert all fconv opts that MONO_OPT_SSE2 would get wrong. 
5576  */
5577 void
5578 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5579 {
5580         MonoInst *fconv;
5581
5582         int dreg, src_opcode;
5583         g_assert (cfg->new_ir);
5584
5585         if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD))
5586                 return;
5587
5588         switch (src_opcode = ins->opcode) {
5589         case OP_FCONV_TO_I1:
5590         case OP_FCONV_TO_U1:
5591         case OP_FCONV_TO_I2:
5592         case OP_FCONV_TO_U2:
5593         case OP_FCONV_TO_I4:
5594         case OP_FCONV_TO_I:
5595                 break;
5596         default:
5597                 return;
5598         }
5599
5600         /* dreg is the IREG and sreg1 is the FREG */
5601         MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5602         fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5603         fconv->sreg1 = ins->sreg1;
5604         fconv->dreg = mono_alloc_ireg (cfg);
5605         fconv->type = STACK_VTYPE;
5606         fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5607
5608         mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5609
5610         dreg = ins->dreg;
5611         NULLIFY_INS (ins);
5612         ins->opcode = OP_XCONV_R8_TO_I4;
5613
5614         ins->klass = mono_defaults.int32_class;
5615         ins->sreg1 = fconv->dreg;
5616         ins->dreg = dreg;
5617         ins->type = STACK_I4;
5618         ins->backend.source_opcode = src_opcode;
5619
5620
5621 }
5622 #endif
5623